##// END OF EJS Templates
copies: Keep changelog sidedata file open during copy tracing...
Simon Sapin -
r48256:5fa083a5 default
parent child Browse files
Show More
@@ -1,1305 +1,1306
1 1 # coding: utf8
2 2 # copies.py - copy detection for Mercurial
3 3 #
4 4 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import collections
12 12 import os
13 13
14 14 from .i18n import _
15 15 from .node import nullrev
16 16
17 17 from . import (
18 18 match as matchmod,
19 19 pathutil,
20 20 policy,
21 21 pycompat,
22 22 util,
23 23 )
24 24
25 25
26 26 from .utils import stringutil
27 27
28 28 from .revlogutils import (
29 29 flagutil,
30 30 sidedata as sidedatamod,
31 31 )
32 32
33 33 rustmod = policy.importrust("copy_tracing")
34 34
35 35
36 36 def _filter(src, dst, t):
37 37 """filters out invalid copies after chaining"""
38 38
39 39 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
40 40 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
41 41 # in the following table (not including trivial cases). For example, case 6
42 42 # is where a file existed in 'src' and remained under that name in 'mid' and
43 43 # then was renamed between 'mid' and 'dst'.
44 44 #
45 45 # case src mid dst result
46 46 # 1 x y - -
47 47 # 2 x y y x->y
48 48 # 3 x y x -
49 49 # 4 x y z x->z
50 50 # 5 - x y -
51 51 # 6 x x y x->y
52 52 #
53 53 # _chain() takes care of chaining the copies in 'a' and 'b', but it
54 54 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
55 55 # between 5 and 6, so it includes all cases in its result.
56 56 # Cases 1, 3, and 5 are then removed by _filter().
57 57
58 58 for k, v in list(t.items()):
59 59 if k == v: # case 3
60 60 del t[k]
61 61 elif v not in src: # case 5
62 62 # remove copies from files that didn't exist
63 63 del t[k]
64 64 elif k not in dst: # case 1
65 65 # remove copies to files that were then removed
66 66 del t[k]
67 67
68 68
69 69 def _chain(prefix, suffix):
70 70 """chain two sets of copies 'prefix' and 'suffix'"""
71 71 result = prefix.copy()
72 72 for key, value in pycompat.iteritems(suffix):
73 73 result[key] = prefix.get(value, value)
74 74 return result
75 75
76 76
77 77 def _tracefile(fctx, am, basemf):
78 78 """return file context that is the ancestor of fctx present in ancestor
79 79 manifest am
80 80
81 81 Note: we used to try and stop after a given limit, however checking if that
82 82 limit is reached turned out to be very expensive. we are better off
83 83 disabling that feature."""
84 84
85 85 for f in fctx.ancestors():
86 86 path = f.path()
87 87 if am.get(path, None) == f.filenode():
88 88 return path
89 89 if basemf and basemf.get(path, None) == f.filenode():
90 90 return path
91 91
92 92
93 93 def _dirstatecopies(repo, match=None):
94 94 ds = repo.dirstate
95 95 c = ds.copies().copy()
96 96 for k in list(c):
97 97 if ds[k] not in b'anm' or (match and not match(k)):
98 98 del c[k]
99 99 return c
100 100
101 101
102 102 def _computeforwardmissing(a, b, match=None):
103 103 """Computes which files are in b but not a.
104 104 This is its own function so extensions can easily wrap this call to see what
105 105 files _forwardcopies is about to process.
106 106 """
107 107 ma = a.manifest()
108 108 mb = b.manifest()
109 109 return mb.filesnotin(ma, match=match)
110 110
111 111
112 112 def usechangesetcentricalgo(repo):
113 113 """Checks if we should use changeset-centric copy algorithms"""
114 114 if repo.filecopiesmode == b'changeset-sidedata':
115 115 return True
116 116 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
117 117 changesetsource = (b'changeset-only', b'compatibility')
118 118 return readfrom in changesetsource
119 119
120 120
121 121 def _committedforwardcopies(a, b, base, match):
122 122 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
123 123 # files might have to be traced back to the fctx parent of the last
124 124 # one-side-only changeset, but not further back than that
125 125 repo = a._repo
126 126
127 127 if usechangesetcentricalgo(repo):
128 128 return _changesetforwardcopies(a, b, match)
129 129
130 130 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
131 131 dbg = repo.ui.debug
132 132 if debug:
133 133 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
134 134 am = a.manifest()
135 135 basemf = None if base is None else base.manifest()
136 136
137 137 # find where new files came from
138 138 # we currently don't try to find where old files went, too expensive
139 139 # this means we can miss a case like 'hg rm b; hg cp a b'
140 140 cm = {}
141 141
142 142 # Computing the forward missing is quite expensive on large manifests, since
143 143 # it compares the entire manifests. We can optimize it in the common use
144 144 # case of computing what copies are in a commit versus its parent (like
145 145 # during a rebase or histedit). Note, we exclude merge commits from this
146 146 # optimization, since the ctx.files() for a merge commit is not correct for
147 147 # this comparison.
148 148 forwardmissingmatch = match
149 149 if b.p1() == a and b.p2().rev() == nullrev:
150 150 filesmatcher = matchmod.exact(b.files())
151 151 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
152 152 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
153 153 missing = list(b.walk(match))
154 154 # _computeforwardmissing(a, b, match=forwardmissingmatch)
155 155 if debug:
156 156 dbg(b'debug.copies: searching all files: %d\n' % len(missing))
157 157 else:
158 158 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
159 159 if debug:
160 160 dbg(
161 161 b'debug.copies: missing files to search: %d\n'
162 162 % len(missing)
163 163 )
164 164
165 165 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
166 166
167 167 for f in sorted(missing):
168 168 if debug:
169 169 dbg(b'debug.copies: tracing file: %s\n' % f)
170 170 fctx = b[f]
171 171 fctx._ancestrycontext = ancestrycontext
172 172
173 173 if debug:
174 174 start = util.timer()
175 175 opath = _tracefile(fctx, am, basemf)
176 176 if opath:
177 177 if debug:
178 178 dbg(b'debug.copies: rename of: %s\n' % opath)
179 179 cm[f] = opath
180 180 if debug:
181 181 dbg(
182 182 b'debug.copies: time: %f seconds\n'
183 183 % (util.timer() - start)
184 184 )
185 185 return cm
186 186
187 187
188 188 def _revinfo_getter(repo, match):
189 189 """returns a function that returns the following data given a <rev>"
190 190
191 191 * p1: revision number of first parent
192 192 * p2: revision number of first parent
193 193 * changes: a ChangingFiles object
194 194 """
195 195 cl = repo.changelog
196 196 parents = cl.parentrevs
197 197 flags = cl.flags
198 198
199 199 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
200 200
201 201 changelogrevision = cl.changelogrevision
202 202
203 203 if rustmod is not None:
204 204
205 205 def revinfo(rev):
206 206 p1, p2 = parents(rev)
207 207 if flags(rev) & HASCOPIESINFO:
208 208 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
209 209 else:
210 210 raw = None
211 211 return (p1, p2, raw)
212 212
213 213 else:
214 214
215 215 def revinfo(rev):
216 216 p1, p2 = parents(rev)
217 217 if flags(rev) & HASCOPIESINFO:
218 218 changes = changelogrevision(rev).changes
219 219 else:
220 220 changes = None
221 221 return (p1, p2, changes)
222 222
223 223 return revinfo
224 224
225 225
226 226 def cached_is_ancestor(is_ancestor):
227 227 """return a cached version of is_ancestor"""
228 228 cache = {}
229 229
230 230 def _is_ancestor(anc, desc):
231 231 if anc > desc:
232 232 return False
233 233 elif anc == desc:
234 234 return True
235 235 key = (anc, desc)
236 236 ret = cache.get(key)
237 237 if ret is None:
238 238 ret = cache[key] = is_ancestor(anc, desc)
239 239 return ret
240 240
241 241 return _is_ancestor
242 242
243 243
244 244 def _changesetforwardcopies(a, b, match):
245 245 if a.rev() in (nullrev, b.rev()):
246 246 return {}
247 247
248 248 repo = a.repo().unfiltered()
249 249 children = {}
250 250
251 251 cl = repo.changelog
252 252 isancestor = cl.isancestorrev
253 253
254 254 # To track rename from "A" to B, we need to gather all parent → children
255 255 # edges that are contains in `::B` but not in `::A`.
256 256 #
257 257 #
258 258 # To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -
259 259 # ::a`) and also all the "roots point", ie the parents of the exclusive set
260 260 # that belong to ::a. These are exactly all the revisions needed to express
261 261 # the parent → children we need to combine.
262 262 #
263 263 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
264 264 # excluding paths that leads to roots that are not ancestors of `a`. We
265 265 # keep this out of the explanation because it is hard enough without this special case..
266 266
267 267 parents = cl._uncheckedparentrevs
268 268 graph_roots = (nullrev, nullrev)
269 269
270 270 ancestors = cl.ancestors([a.rev()], inclusive=True)
271 271 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
272 272 roots = set()
273 273 has_graph_roots = False
274 274 multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread')
275 275
276 276 # iterate over `only(B, A)`
277 277 for r in revs:
278 278 ps = parents(r)
279 279 if ps == graph_roots:
280 280 has_graph_roots = True
281 281 else:
282 282 p1, p2 = ps
283 283
284 284 # find all the "root points" (see larger comment above)
285 285 if p1 != nullrev and p1 in ancestors:
286 286 roots.add(p1)
287 287 if p2 != nullrev and p2 in ancestors:
288 288 roots.add(p2)
289 289 if not roots:
290 290 # no common revision to track copies from
291 291 return {}
292 292 if has_graph_roots:
293 293 # this deal with the special case mentionned in the [1] footnotes. We
294 294 # must filter out revisions that leads to non-common graphroots.
295 295 roots = list(roots)
296 296 m = min(roots)
297 297 h = [b.rev()]
298 298 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
299 299 roots_to_head = set(roots_to_head)
300 300 revs = [r for r in revs if r in roots_to_head]
301 301
302 302 if repo.filecopiesmode == b'changeset-sidedata':
303 303 # When using side-data, we will process the edges "from" the children.
304 304 # We iterate over the childre, gathering previous collected data for
305 305 # the parents. Do know when the parents data is no longer necessary, we
306 306 # keep a counter of how many children each revision has.
307 307 #
308 308 # An interresting property of `children_count` is that it only contains
309 309 # revision that will be relevant for a edge of the graph. So if a
310 310 # children has parent not in `children_count`, that edges should not be
311 311 # processed.
312 312 children_count = dict((r, 0) for r in roots)
313 313 for r in revs:
314 314 for p in cl.parentrevs(r):
315 315 if p == nullrev:
316 316 continue
317 317 children_count[r] = 0
318 318 if p in children_count:
319 319 children_count[p] += 1
320 320 revinfo = _revinfo_getter(repo, match)
321 return _combine_changeset_copies(
322 revs,
323 children_count,
324 b.rev(),
325 revinfo,
326 match,
327 isancestor,
328 multi_thread,
329 )
321 with repo.changelog.reading():
322 return _combine_changeset_copies(
323 revs,
324 children_count,
325 b.rev(),
326 revinfo,
327 match,
328 isancestor,
329 multi_thread,
330 )
330 331 else:
331 332 # When not using side-data, we will process the edges "from" the parent.
332 333 # so we need a full mapping of the parent -> children relation.
333 334 children = dict((r, []) for r in roots)
334 335 for r in revs:
335 336 for p in cl.parentrevs(r):
336 337 if p == nullrev:
337 338 continue
338 339 children[r] = []
339 340 if p in children:
340 341 children[p].append(r)
341 342 x = revs.pop()
342 343 assert x == b.rev()
343 344 revs.extend(roots)
344 345 revs.sort()
345 346
346 347 revinfo = _revinfo_getter_extra(repo)
347 348 return _combine_changeset_copies_extra(
348 349 revs, children, b.rev(), revinfo, match, isancestor
349 350 )
350 351
351 352
352 353 def _combine_changeset_copies(
353 354 revs, children_count, targetrev, revinfo, match, isancestor, multi_thread
354 355 ):
355 356 """combine the copies information for each item of iterrevs
356 357
357 358 revs: sorted iterable of revision to visit
358 359 children_count: a {parent: <number-of-relevant-children>} mapping.
359 360 targetrev: the final copies destination revision (not in iterrevs)
360 361 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
361 362 match: a matcher
362 363
363 364 It returns the aggregated copies information for `targetrev`.
364 365 """
365 366
366 367 alwaysmatch = match.always()
367 368
368 369 if rustmod is not None:
369 370 final_copies = rustmod.combine_changeset_copies(
370 371 list(revs), children_count, targetrev, revinfo, multi_thread
371 372 )
372 373 else:
373 374 isancestor = cached_is_ancestor(isancestor)
374 375
375 376 all_copies = {}
376 377 # iterate over all the "children" side of copy tracing "edge"
377 378 for current_rev in revs:
378 379 p1, p2, changes = revinfo(current_rev)
379 380 current_copies = None
380 381 # iterate over all parents to chain the existing data with the
381 382 # data from the parent → child edge.
382 383 for parent, parent_rev in ((1, p1), (2, p2)):
383 384 if parent_rev == nullrev:
384 385 continue
385 386 remaining_children = children_count.get(parent_rev)
386 387 if remaining_children is None:
387 388 continue
388 389 remaining_children -= 1
389 390 children_count[parent_rev] = remaining_children
390 391 if remaining_children:
391 392 copies = all_copies.get(parent_rev, None)
392 393 else:
393 394 copies = all_copies.pop(parent_rev, None)
394 395
395 396 if copies is None:
396 397 # this is a root
397 398 newcopies = copies = {}
398 399 elif remaining_children:
399 400 newcopies = copies.copy()
400 401 else:
401 402 newcopies = copies
402 403 # chain the data in the edge with the existing data
403 404 if changes is not None:
404 405 childcopies = {}
405 406 if parent == 1:
406 407 childcopies = changes.copied_from_p1
407 408 elif parent == 2:
408 409 childcopies = changes.copied_from_p2
409 410
410 411 if childcopies:
411 412 newcopies = copies.copy()
412 413 for dest, source in pycompat.iteritems(childcopies):
413 414 prev = copies.get(source)
414 415 if prev is not None and prev[1] is not None:
415 416 source = prev[1]
416 417 newcopies[dest] = (current_rev, source)
417 418 assert newcopies is not copies
418 419 if changes.removed:
419 420 for f in changes.removed:
420 421 if f in newcopies:
421 422 if newcopies is copies:
422 423 # copy on write to avoid affecting potential other
423 424 # branches. when there are no other branches, this
424 425 # could be avoided.
425 426 newcopies = copies.copy()
426 427 newcopies[f] = (current_rev, None)
427 428 # check potential need to combine the data from another parent (for
428 429 # that child). See comment below for details.
429 430 if current_copies is None:
430 431 current_copies = newcopies
431 432 else:
432 433 # we are the second parent to work on c, we need to merge our
433 434 # work with the other.
434 435 #
435 436 # In case of conflict, parent 1 take precedence over parent 2.
436 437 # This is an arbitrary choice made anew when implementing
437 438 # changeset based copies. It was made without regards with
438 439 # potential filelog related behavior.
439 440 assert parent == 2
440 441 current_copies = _merge_copies_dict(
441 442 newcopies,
442 443 current_copies,
443 444 isancestor,
444 445 changes,
445 446 current_rev,
446 447 )
447 448 all_copies[current_rev] = current_copies
448 449
449 450 # filter out internal details and return a {dest: source mapping}
450 451 final_copies = {}
451 452 for dest, (tt, source) in all_copies[targetrev].items():
452 453 if source is not None:
453 454 final_copies[dest] = source
454 455 if not alwaysmatch:
455 456 for filename in list(final_copies.keys()):
456 457 if not match(filename):
457 458 del final_copies[filename]
458 459 return final_copies
459 460
460 461
461 462 # constant to decide which side to pick with _merge_copies_dict
462 463 PICK_MINOR = 0
463 464 PICK_MAJOR = 1
464 465 PICK_EITHER = 2
465 466
466 467
467 468 def _merge_copies_dict(minor, major, isancestor, changes, current_merge):
468 469 """merge two copies-mapping together, minor and major
469 470
470 471 In case of conflict, value from "major" will be picked.
471 472
472 473 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
473 474 ancestors of `high_rev`,
474 475
475 476 - `ismerged(path)`: callable return True if `path` have been merged in the
476 477 current revision,
477 478
478 479 return the resulting dict (in practice, the "minor" object, updated)
479 480 """
480 481 for dest, value in major.items():
481 482 other = minor.get(dest)
482 483 if other is None:
483 484 minor[dest] = value
484 485 else:
485 486 pick, overwrite = _compare_values(
486 487 changes, isancestor, dest, other, value
487 488 )
488 489 if overwrite:
489 490 if pick == PICK_MAJOR:
490 491 minor[dest] = (current_merge, value[1])
491 492 else:
492 493 minor[dest] = (current_merge, other[1])
493 494 elif pick == PICK_MAJOR:
494 495 minor[dest] = value
495 496 return minor
496 497
497 498
498 499 def _compare_values(changes, isancestor, dest, minor, major):
499 500 """compare two value within a _merge_copies_dict loop iteration
500 501
501 502 return (pick, overwrite).
502 503
503 504 - pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER
504 505 - overwrite is True if pick is a return of an ambiguity that needs resolution.
505 506 """
506 507 major_tt, major_value = major
507 508 minor_tt, minor_value = minor
508 509
509 510 if major_tt == minor_tt:
510 511 # if it comes from the same revision it must be the same value
511 512 assert major_value == minor_value
512 513 return PICK_EITHER, False
513 514 elif (
514 515 changes is not None
515 516 and minor_value is not None
516 517 and major_value is None
517 518 and dest in changes.salvaged
518 519 ):
519 520 # In this case, a deletion was reverted, the "alive" value overwrite
520 521 # the deleted one.
521 522 return PICK_MINOR, True
522 523 elif (
523 524 changes is not None
524 525 and major_value is not None
525 526 and minor_value is None
526 527 and dest in changes.salvaged
527 528 ):
528 529 # In this case, a deletion was reverted, the "alive" value overwrite
529 530 # the deleted one.
530 531 return PICK_MAJOR, True
531 532 elif isancestor(minor_tt, major_tt):
532 533 if changes is not None and dest in changes.merged:
533 534 # change to dest happened on the branch without copy-source change,
534 535 # so both source are valid and "major" wins.
535 536 return PICK_MAJOR, True
536 537 else:
537 538 return PICK_MAJOR, False
538 539 elif isancestor(major_tt, minor_tt):
539 540 if changes is not None and dest in changes.merged:
540 541 # change to dest happened on the branch without copy-source change,
541 542 # so both source are valid and "major" wins.
542 543 return PICK_MAJOR, True
543 544 else:
544 545 return PICK_MINOR, False
545 546 elif minor_value is None:
546 547 # in case of conflict, the "alive" side wins.
547 548 return PICK_MAJOR, True
548 549 elif major_value is None:
549 550 # in case of conflict, the "alive" side wins.
550 551 return PICK_MINOR, True
551 552 else:
552 553 # in case of conflict where both side are alive, major wins.
553 554 return PICK_MAJOR, True
554 555
555 556
556 557 def _revinfo_getter_extra(repo):
557 558 """return a function that return multiple data given a <rev>"i
558 559
559 560 * p1: revision number of first parent
560 561 * p2: revision number of first parent
561 562 * p1copies: mapping of copies from p1
562 563 * p2copies: mapping of copies from p2
563 564 * removed: a list of removed files
564 565 * ismerged: a callback to know if file was merged in that revision
565 566 """
566 567 cl = repo.changelog
567 568 parents = cl.parentrevs
568 569
569 570 def get_ismerged(rev):
570 571 ctx = repo[rev]
571 572
572 573 def ismerged(path):
573 574 if path not in ctx.files():
574 575 return False
575 576 fctx = ctx[path]
576 577 parents = fctx._filelog.parents(fctx._filenode)
577 578 nb_parents = 0
578 579 for n in parents:
579 580 if n != repo.nullid:
580 581 nb_parents += 1
581 582 return nb_parents >= 2
582 583
583 584 return ismerged
584 585
585 586 def revinfo(rev):
586 587 p1, p2 = parents(rev)
587 588 ctx = repo[rev]
588 589 p1copies, p2copies = ctx._copies
589 590 removed = ctx.filesremoved()
590 591 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
591 592
592 593 return revinfo
593 594
594 595
595 596 def _combine_changeset_copies_extra(
596 597 revs, children, targetrev, revinfo, match, isancestor
597 598 ):
598 599 """version of `_combine_changeset_copies` that works with the Google
599 600 specific "extra" based storage for copy information"""
600 601 all_copies = {}
601 602 alwaysmatch = match.always()
602 603 for r in revs:
603 604 copies = all_copies.pop(r, None)
604 605 if copies is None:
605 606 # this is a root
606 607 copies = {}
607 608 for i, c in enumerate(children[r]):
608 609 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
609 610 if r == p1:
610 611 parent = 1
611 612 childcopies = p1copies
612 613 else:
613 614 assert r == p2
614 615 parent = 2
615 616 childcopies = p2copies
616 617 if not alwaysmatch:
617 618 childcopies = {
618 619 dst: src for dst, src in childcopies.items() if match(dst)
619 620 }
620 621 newcopies = copies
621 622 if childcopies:
622 623 newcopies = copies.copy()
623 624 for dest, source in pycompat.iteritems(childcopies):
624 625 prev = copies.get(source)
625 626 if prev is not None and prev[1] is not None:
626 627 source = prev[1]
627 628 newcopies[dest] = (c, source)
628 629 assert newcopies is not copies
629 630 for f in removed:
630 631 if f in newcopies:
631 632 if newcopies is copies:
632 633 # copy on write to avoid affecting potential other
633 634 # branches. when there are no other branches, this
634 635 # could be avoided.
635 636 newcopies = copies.copy()
636 637 newcopies[f] = (c, None)
637 638 othercopies = all_copies.get(c)
638 639 if othercopies is None:
639 640 all_copies[c] = newcopies
640 641 else:
641 642 # we are the second parent to work on c, we need to merge our
642 643 # work with the other.
643 644 #
644 645 # In case of conflict, parent 1 take precedence over parent 2.
645 646 # This is an arbitrary choice made anew when implementing
646 647 # changeset based copies. It was made without regards with
647 648 # potential filelog related behavior.
648 649 if parent == 1:
649 650 _merge_copies_dict_extra(
650 651 othercopies, newcopies, isancestor, ismerged
651 652 )
652 653 else:
653 654 _merge_copies_dict_extra(
654 655 newcopies, othercopies, isancestor, ismerged
655 656 )
656 657 all_copies[c] = newcopies
657 658
658 659 final_copies = {}
659 660 for dest, (tt, source) in all_copies[targetrev].items():
660 661 if source is not None:
661 662 final_copies[dest] = source
662 663 return final_copies
663 664
664 665
665 666 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
666 667 """version of `_merge_copies_dict` that works with the Google
667 668 specific "extra" based storage for copy information"""
668 669 for dest, value in major.items():
669 670 other = minor.get(dest)
670 671 if other is None:
671 672 minor[dest] = value
672 673 else:
673 674 new_tt = value[0]
674 675 other_tt = other[0]
675 676 if value[1] == other[1]:
676 677 continue
677 678 # content from "major" wins, unless it is older
678 679 # than the branch point or there is a merge
679 680 if (
680 681 new_tt == other_tt
681 682 or not isancestor(new_tt, other_tt)
682 683 or ismerged(dest)
683 684 ):
684 685 minor[dest] = value
685 686
686 687
687 688 def _forwardcopies(a, b, base=None, match=None):
688 689 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
689 690
690 691 if base is None:
691 692 base = a
692 693 match = a.repo().narrowmatch(match)
693 694 # check for working copy
694 695 if b.rev() is None:
695 696 cm = _committedforwardcopies(a, b.p1(), base, match)
696 697 # combine copies from dirstate if necessary
697 698 copies = _chain(cm, _dirstatecopies(b._repo, match))
698 699 else:
699 700 copies = _committedforwardcopies(a, b, base, match)
700 701 return copies
701 702
702 703
703 704 def _backwardrenames(a, b, match):
704 705 """find renames from a to b"""
705 706 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
706 707 return {}
707 708
708 709 # We don't want to pass in "match" here, since that would filter
709 710 # the destination by it. Since we're reversing the copies, we want
710 711 # to filter the source instead.
711 712 copies = _forwardcopies(b, a)
712 713 return _reverse_renames(copies, a, match)
713 714
714 715
715 716 def _reverse_renames(copies, dst, match):
716 717 """given copies to context 'dst', finds renames from that context"""
717 718 # Even though we're not taking copies into account, 1:n rename situations
718 719 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
719 720 # arbitrarily pick one of the renames.
720 721 r = {}
721 722 for k, v in sorted(pycompat.iteritems(copies)):
722 723 if match and not match(v):
723 724 continue
724 725 # remove copies
725 726 if v in dst:
726 727 continue
727 728 r[v] = k
728 729 return r
729 730
730 731
731 732 def pathcopies(x, y, match=None):
732 733 """find {dst@y: src@x} copy mapping for directed compare"""
733 734 repo = x._repo
734 735 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
735 736 if debug:
736 737 repo.ui.debug(
737 738 b'debug.copies: searching copies from %s to %s\n' % (x, y)
738 739 )
739 740 if x == y or not x or not y:
740 741 return {}
741 742 if y.rev() is None and x == y.p1():
742 743 if debug:
743 744 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
744 745 # short-circuit to avoid issues with merge states
745 746 return _dirstatecopies(repo, match)
746 747 a = y.ancestor(x)
747 748 if a == x:
748 749 if debug:
749 750 repo.ui.debug(b'debug.copies: search mode: forward\n')
750 751 copies = _forwardcopies(x, y, match=match)
751 752 elif a == y:
752 753 if debug:
753 754 repo.ui.debug(b'debug.copies: search mode: backward\n')
754 755 copies = _backwardrenames(x, y, match=match)
755 756 else:
756 757 if debug:
757 758 repo.ui.debug(b'debug.copies: search mode: combined\n')
758 759 base = None
759 760 if a.rev() != nullrev:
760 761 base = x
761 762 x_copies = _forwardcopies(a, x)
762 763 y_copies = _forwardcopies(a, y, base, match=match)
763 764 same_keys = set(x_copies) & set(y_copies)
764 765 for k in same_keys:
765 766 if x_copies.get(k) == y_copies.get(k):
766 767 del x_copies[k]
767 768 del y_copies[k]
768 769 x_backward_renames = _reverse_renames(x_copies, x, match)
769 770 copies = _chain(
770 771 x_backward_renames,
771 772 y_copies,
772 773 )
773 774 _filter(x, y, copies)
774 775 return copies
775 776
776 777
777 778 def mergecopies(repo, c1, c2, base):
778 779 """
779 780 Finds moves and copies between context c1 and c2 that are relevant for
780 781 merging. 'base' will be used as the merge base.
781 782
782 783 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
783 784 files that were moved/ copied in one merge parent and modified in another.
784 785 For example:
785 786
786 787 o ---> 4 another commit
787 788 |
788 789 | o ---> 3 commit that modifies a.txt
789 790 | /
790 791 o / ---> 2 commit that moves a.txt to b.txt
791 792 |/
792 793 o ---> 1 merge base
793 794
794 795 If we try to rebase revision 3 on revision 4, since there is no a.txt in
795 796 revision 4, and if user have copytrace disabled, we prints the following
796 797 message:
797 798
798 799 ```other changed <file> which local deleted```
799 800
800 801 Returns a tuple where:
801 802
802 803 "branch_copies" an instance of branch_copies.
803 804
804 805 "diverge" is a mapping of source name -> list of destination names
805 806 for divergent renames.
806 807
807 808 This function calls different copytracing algorithms based on config.
808 809 """
809 810 # avoid silly behavior for update from empty dir
810 811 if not c1 or not c2 or c1 == c2:
811 812 return branch_copies(), branch_copies(), {}
812 813
813 814 narrowmatch = c1.repo().narrowmatch()
814 815
815 816 # avoid silly behavior for parent -> working dir
816 817 if c2.node() is None and c1.node() == repo.dirstate.p1():
817 818 return (
818 819 branch_copies(_dirstatecopies(repo, narrowmatch)),
819 820 branch_copies(),
820 821 {},
821 822 )
822 823
823 824 copytracing = repo.ui.config(b'experimental', b'copytrace')
824 825 if stringutil.parsebool(copytracing) is False:
825 826 # stringutil.parsebool() returns None when it is unable to parse the
826 827 # value, so we should rely on making sure copytracing is on such cases
827 828 return branch_copies(), branch_copies(), {}
828 829
829 830 if usechangesetcentricalgo(repo):
830 831 # The heuristics don't make sense when we need changeset-centric algos
831 832 return _fullcopytracing(repo, c1, c2, base)
832 833
833 834 # Copy trace disabling is explicitly below the node == p1 logic above
834 835 # because the logic above is required for a simple copy to be kept across a
835 836 # rebase.
836 837 if copytracing == b'heuristics':
837 838 # Do full copytracing if only non-public revisions are involved as
838 839 # that will be fast enough and will also cover the copies which could
839 840 # be missed by heuristics
840 841 if _isfullcopytraceable(repo, c1, base):
841 842 return _fullcopytracing(repo, c1, c2, base)
842 843 return _heuristicscopytracing(repo, c1, c2, base)
843 844 else:
844 845 return _fullcopytracing(repo, c1, c2, base)
845 846
846 847
847 848 def _isfullcopytraceable(repo, c1, base):
848 849 """Checks that if base, source and destination are all no-public branches,
849 850 if yes let's use the full copytrace algorithm for increased capabilities
850 851 since it will be fast enough.
851 852
852 853 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
853 854 number of changesets from c1 to base such that if number of changesets are
854 855 more than the limit, full copytracing algorithm won't be used.
855 856 """
856 857 if c1.rev() is None:
857 858 c1 = c1.p1()
858 859 if c1.mutable() and base.mutable():
859 860 sourcecommitlimit = repo.ui.configint(
860 861 b'experimental', b'copytrace.sourcecommitlimit'
861 862 )
862 863 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
863 864 return commits < sourcecommitlimit
864 865 return False
865 866
866 867
867 868 def _checksinglesidecopies(
868 869 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
869 870 ):
870 871 if src not in m2:
871 872 # deleted on side 2
872 873 if src not in m1:
873 874 # renamed on side 1, deleted on side 2
874 875 renamedelete[src] = dsts1
875 876 elif src not in mb:
876 877 # Work around the "short-circuit to avoid issues with merge states"
877 878 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
878 879 # destination doesn't exist in y.
879 880 pass
880 881 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
881 882 return
882 883 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
883 884 # modified on side 2
884 885 for dst in dsts1:
885 886 copy[dst] = src
886 887
887 888
888 889 class branch_copies(object):
889 890 """Information about copies made on one side of a merge/graft.
890 891
891 892 "copy" is a mapping from destination name -> source name,
892 893 where source is in c1 and destination is in c2 or vice-versa.
893 894
894 895 "movewithdir" is a mapping from source name -> destination name,
895 896 where the file at source present in one context but not the other
896 897 needs to be moved to destination by the merge process, because the
897 898 other context moved the directory it is in.
898 899
899 900 "renamedelete" is a mapping of source name -> list of destination
900 901 names for files deleted in c1 that were renamed in c2 or vice-versa.
901 902
902 903 "dirmove" is a mapping of detected source dir -> destination dir renames.
903 904 This is needed for handling changes to new files previously grafted into
904 905 renamed directories.
905 906 """
906 907
907 908 def __init__(
908 909 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
909 910 ):
910 911 self.copy = {} if copy is None else copy
911 912 self.renamedelete = {} if renamedelete is None else renamedelete
912 913 self.dirmove = {} if dirmove is None else dirmove
913 914 self.movewithdir = {} if movewithdir is None else movewithdir
914 915
915 916 def __repr__(self):
916 917 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
917 918 self.copy,
918 919 self.renamedelete,
919 920 self.dirmove,
920 921 self.movewithdir,
921 922 )
922 923
923 924
924 925 def _fullcopytracing(repo, c1, c2, base):
925 926 """The full copytracing algorithm which finds all the new files that were
926 927 added from merge base up to the top commit and for each file it checks if
927 928 this file was copied from another file.
928 929
929 930 This is pretty slow when a lot of changesets are involved but will track all
930 931 the copies.
931 932 """
932 933 m1 = c1.manifest()
933 934 m2 = c2.manifest()
934 935 mb = base.manifest()
935 936
936 937 copies1 = pathcopies(base, c1)
937 938 copies2 = pathcopies(base, c2)
938 939
939 940 if not (copies1 or copies2):
940 941 return branch_copies(), branch_copies(), {}
941 942
942 943 inversecopies1 = {}
943 944 inversecopies2 = {}
944 945 for dst, src in copies1.items():
945 946 inversecopies1.setdefault(src, []).append(dst)
946 947 for dst, src in copies2.items():
947 948 inversecopies2.setdefault(src, []).append(dst)
948 949
949 950 copy1 = {}
950 951 copy2 = {}
951 952 diverge = {}
952 953 renamedelete1 = {}
953 954 renamedelete2 = {}
954 955 allsources = set(inversecopies1) | set(inversecopies2)
955 956 for src in allsources:
956 957 dsts1 = inversecopies1.get(src)
957 958 dsts2 = inversecopies2.get(src)
958 959 if dsts1 and dsts2:
959 960 # copied/renamed on both sides
960 961 if src not in m1 and src not in m2:
961 962 # renamed on both sides
962 963 dsts1 = set(dsts1)
963 964 dsts2 = set(dsts2)
964 965 # If there's some overlap in the rename destinations, we
965 966 # consider it not divergent. For example, if side 1 copies 'a'
966 967 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
967 968 # and 'd' and deletes 'a'.
968 969 if dsts1 & dsts2:
969 970 for dst in dsts1 & dsts2:
970 971 copy1[dst] = src
971 972 copy2[dst] = src
972 973 else:
973 974 diverge[src] = sorted(dsts1 | dsts2)
974 975 elif src in m1 and src in m2:
975 976 # copied on both sides
976 977 dsts1 = set(dsts1)
977 978 dsts2 = set(dsts2)
978 979 for dst in dsts1 & dsts2:
979 980 copy1[dst] = src
980 981 copy2[dst] = src
981 982 # TODO: Handle cases where it was renamed on one side and copied
982 983 # on the other side
983 984 elif dsts1:
984 985 # copied/renamed only on side 1
985 986 _checksinglesidecopies(
986 987 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
987 988 )
988 989 elif dsts2:
989 990 # copied/renamed only on side 2
990 991 _checksinglesidecopies(
991 992 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
992 993 )
993 994
994 995 # find interesting file sets from manifests
995 996 cache = []
996 997
997 998 def _get_addedfiles(idx):
998 999 if not cache:
999 1000 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
1000 1001 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
1001 1002 u1 = sorted(addedinm1 - addedinm2)
1002 1003 u2 = sorted(addedinm2 - addedinm1)
1003 1004 cache.extend((u1, u2))
1004 1005 return cache[idx]
1005 1006
1006 1007 u1fn = lambda: _get_addedfiles(0)
1007 1008 u2fn = lambda: _get_addedfiles(1)
1008 1009 if repo.ui.debugflag:
1009 1010 u1 = u1fn()
1010 1011 u2 = u2fn()
1011 1012
1012 1013 header = b" unmatched files in %s"
1013 1014 if u1:
1014 1015 repo.ui.debug(
1015 1016 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
1016 1017 )
1017 1018 if u2:
1018 1019 repo.ui.debug(
1019 1020 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
1020 1021 )
1021 1022
1022 1023 renamedeleteset = set()
1023 1024 divergeset = set()
1024 1025 for dsts in diverge.values():
1025 1026 divergeset.update(dsts)
1026 1027 for dsts in renamedelete1.values():
1027 1028 renamedeleteset.update(dsts)
1028 1029 for dsts in renamedelete2.values():
1029 1030 renamedeleteset.update(dsts)
1030 1031
1031 1032 repo.ui.debug(
1032 1033 b" all copies found (* = to merge, ! = divergent, "
1033 1034 b"% = renamed and deleted):\n"
1034 1035 )
1035 1036 for side, copies in ((b"local", copies1), (b"remote", copies2)):
1036 1037 if not copies:
1037 1038 continue
1038 1039 repo.ui.debug(b" on %s side:\n" % side)
1039 1040 for f in sorted(copies):
1040 1041 note = b""
1041 1042 if f in copy1 or f in copy2:
1042 1043 note += b"*"
1043 1044 if f in divergeset:
1044 1045 note += b"!"
1045 1046 if f in renamedeleteset:
1046 1047 note += b"%"
1047 1048 repo.ui.debug(
1048 1049 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
1049 1050 )
1050 1051 del renamedeleteset
1051 1052 del divergeset
1052 1053
1053 1054 repo.ui.debug(b" checking for directory renames\n")
1054 1055
1055 1056 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
1056 1057 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
1057 1058
1058 1059 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
1059 1060 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
1060 1061
1061 1062 return branch_copies1, branch_copies2, diverge
1062 1063
1063 1064
1064 1065 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1065 1066 """Finds moved directories and files that should move with them.
1066 1067
1067 1068 ctx: the context for one of the sides
1068 1069 copy: files copied on the same side (as ctx)
1069 1070 fullcopy: files copied on the same side (as ctx), including those that
1070 1071 merge.manifestmerge() won't care about
1071 1072 addedfilesfn: function returning added files on the other side (compared to
1072 1073 ctx)
1073 1074 """
1074 1075 # generate a directory move map
1075 1076 invalid = set()
1076 1077 dirmove = {}
1077 1078
1078 1079 # examine each file copy for a potential directory move, which is
1079 1080 # when all the files in a directory are moved to a new directory
1080 1081 for dst, src in pycompat.iteritems(fullcopy):
1081 1082 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1082 1083 if dsrc in invalid:
1083 1084 # already seen to be uninteresting
1084 1085 continue
1085 1086 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1086 1087 # directory wasn't entirely moved locally
1087 1088 invalid.add(dsrc)
1088 1089 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1089 1090 # files from the same directory moved to two different places
1090 1091 invalid.add(dsrc)
1091 1092 else:
1092 1093 # looks good so far
1093 1094 dirmove[dsrc] = ddst
1094 1095
1095 1096 for i in invalid:
1096 1097 if i in dirmove:
1097 1098 del dirmove[i]
1098 1099 del invalid
1099 1100
1100 1101 if not dirmove:
1101 1102 return {}, {}
1102 1103
1103 1104 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1104 1105
1105 1106 for d in dirmove:
1106 1107 repo.ui.debug(
1107 1108 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1108 1109 )
1109 1110
1110 1111 # Sort the directories in reverse order, so we find children first
1111 1112 # For example, if dir1/ was renamed to dir2/, and dir1/subdir1/
1112 1113 # was renamed to dir2/subdir2/, we want to move dir1/subdir1/file
1113 1114 # to dir2/subdir2/file (not dir2/subdir1/file)
1114 1115 dirmove_children_first = sorted(dirmove, reverse=True)
1115 1116
1116 1117 movewithdir = {}
1117 1118 # check unaccounted nonoverlapping files against directory moves
1118 1119 for f in addedfilesfn():
1119 1120 if f not in fullcopy:
1120 1121 for d in dirmove_children_first:
1121 1122 if f.startswith(d):
1122 1123 # new file added in a directory that was moved, move it
1123 1124 df = dirmove[d] + f[len(d) :]
1124 1125 if df not in copy:
1125 1126 movewithdir[f] = df
1126 1127 repo.ui.debug(
1127 1128 b" pending file src: '%s' -> dst: '%s'\n"
1128 1129 % (f, df)
1129 1130 )
1130 1131 break
1131 1132
1132 1133 return dirmove, movewithdir
1133 1134
1134 1135
1135 1136 def _heuristicscopytracing(repo, c1, c2, base):
1136 1137 """Fast copytracing using filename heuristics
1137 1138
1138 1139 Assumes that moves or renames are of following two types:
1139 1140
1140 1141 1) Inside a directory only (same directory name but different filenames)
1141 1142 2) Move from one directory to another
1142 1143 (same filenames but different directory names)
1143 1144
1144 1145 Works only when there are no merge commits in the "source branch".
1145 1146 Source branch is commits from base up to c2 not including base.
1146 1147
1147 1148 If merge is involved it fallbacks to _fullcopytracing().
1148 1149
1149 1150 Can be used by setting the following config:
1150 1151
1151 1152 [experimental]
1152 1153 copytrace = heuristics
1153 1154
1154 1155 In some cases the copy/move candidates found by heuristics can be very large
1155 1156 in number and that will make the algorithm slow. The number of possible
1156 1157 candidates to check can be limited by using the config
1157 1158 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1158 1159 """
1159 1160
1160 1161 if c1.rev() is None:
1161 1162 c1 = c1.p1()
1162 1163 if c2.rev() is None:
1163 1164 c2 = c2.p1()
1164 1165
1165 1166 changedfiles = set()
1166 1167 m1 = c1.manifest()
1167 1168 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1168 1169 # If base is not in c2 branch, we switch to fullcopytracing
1169 1170 repo.ui.debug(
1170 1171 b"switching to full copytracing as base is not "
1171 1172 b"an ancestor of c2\n"
1172 1173 )
1173 1174 return _fullcopytracing(repo, c1, c2, base)
1174 1175
1175 1176 ctx = c2
1176 1177 while ctx != base:
1177 1178 if len(ctx.parents()) == 2:
1178 1179 # To keep things simple let's not handle merges
1179 1180 repo.ui.debug(b"switching to full copytracing because of merges\n")
1180 1181 return _fullcopytracing(repo, c1, c2, base)
1181 1182 changedfiles.update(ctx.files())
1182 1183 ctx = ctx.p1()
1183 1184
1184 1185 copies2 = {}
1185 1186 cp = _forwardcopies(base, c2)
1186 1187 for dst, src in pycompat.iteritems(cp):
1187 1188 if src in m1:
1188 1189 copies2[dst] = src
1189 1190
1190 1191 # file is missing if it isn't present in the destination, but is present in
1191 1192 # the base and present in the source.
1192 1193 # Presence in the base is important to exclude added files, presence in the
1193 1194 # source is important to exclude removed files.
1194 1195 filt = lambda f: f not in m1 and f in base and f in c2
1195 1196 missingfiles = [f for f in changedfiles if filt(f)]
1196 1197
1197 1198 copies1 = {}
1198 1199 if missingfiles:
1199 1200 basenametofilename = collections.defaultdict(list)
1200 1201 dirnametofilename = collections.defaultdict(list)
1201 1202
1202 1203 for f in m1.filesnotin(base.manifest()):
1203 1204 basename = os.path.basename(f)
1204 1205 dirname = os.path.dirname(f)
1205 1206 basenametofilename[basename].append(f)
1206 1207 dirnametofilename[dirname].append(f)
1207 1208
1208 1209 for f in missingfiles:
1209 1210 basename = os.path.basename(f)
1210 1211 dirname = os.path.dirname(f)
1211 1212 samebasename = basenametofilename[basename]
1212 1213 samedirname = dirnametofilename[dirname]
1213 1214 movecandidates = samebasename + samedirname
1214 1215 # f is guaranteed to be present in c2, that's why
1215 1216 # c2.filectx(f) won't fail
1216 1217 f2 = c2.filectx(f)
1217 1218 # we can have a lot of candidates which can slow down the heuristics
1218 1219 # config value to limit the number of candidates moves to check
1219 1220 maxcandidates = repo.ui.configint(
1220 1221 b'experimental', b'copytrace.movecandidateslimit'
1221 1222 )
1222 1223
1223 1224 if len(movecandidates) > maxcandidates:
1224 1225 repo.ui.status(
1225 1226 _(
1226 1227 b"skipping copytracing for '%s', more "
1227 1228 b"candidates than the limit: %d\n"
1228 1229 )
1229 1230 % (f, len(movecandidates))
1230 1231 )
1231 1232 continue
1232 1233
1233 1234 for candidate in movecandidates:
1234 1235 f1 = c1.filectx(candidate)
1235 1236 if _related(f1, f2):
1236 1237 # if there are a few related copies then we'll merge
1237 1238 # changes into all of them. This matches the behaviour
1238 1239 # of upstream copytracing
1239 1240 copies1[candidate] = f
1240 1241
1241 1242 return branch_copies(copies1), branch_copies(copies2), {}
1242 1243
1243 1244
1244 1245 def _related(f1, f2):
1245 1246 """return True if f1 and f2 filectx have a common ancestor
1246 1247
1247 1248 Walk back to common ancestor to see if the two files originate
1248 1249 from the same file. Since workingfilectx's rev() is None it messes
1249 1250 up the integer comparison logic, hence the pre-step check for
1250 1251 None (f1 and f2 can only be workingfilectx's initially).
1251 1252 """
1252 1253
1253 1254 if f1 == f2:
1254 1255 return True # a match
1255 1256
1256 1257 g1, g2 = f1.ancestors(), f2.ancestors()
1257 1258 try:
1258 1259 f1r, f2r = f1.linkrev(), f2.linkrev()
1259 1260
1260 1261 if f1r is None:
1261 1262 f1 = next(g1)
1262 1263 if f2r is None:
1263 1264 f2 = next(g2)
1264 1265
1265 1266 while True:
1266 1267 f1r, f2r = f1.linkrev(), f2.linkrev()
1267 1268 if f1r > f2r:
1268 1269 f1 = next(g1)
1269 1270 elif f2r > f1r:
1270 1271 f2 = next(g2)
1271 1272 else: # f1 and f2 point to files in the same linkrev
1272 1273 return f1 == f2 # true if they point to the same file
1273 1274 except StopIteration:
1274 1275 return False
1275 1276
1276 1277
1277 1278 def graftcopies(wctx, ctx, base):
1278 1279 """reproduce copies between base and ctx in the wctx
1279 1280
1280 1281 Unlike mergecopies(), this function will only consider copies between base
1281 1282 and ctx; it will ignore copies between base and wctx. Also unlike
1282 1283 mergecopies(), this function will apply copies to the working copy (instead
1283 1284 of just returning information about the copies). That makes it cheaper
1284 1285 (especially in the common case of base==ctx.p1()) and useful also when
1285 1286 experimental.copytrace=off.
1286 1287
1287 1288 merge.update() will have already marked most copies, but it will only
1288 1289 mark copies if it thinks the source files are related (see
1289 1290 merge._related()). It will also not mark copies if the file wasn't modified
1290 1291 on the local side. This function adds the copies that were "missed"
1291 1292 by merge.update().
1292 1293 """
1293 1294 new_copies = pathcopies(base, ctx)
1294 1295 parent = wctx.p1()
1295 1296 _filter(parent, wctx, new_copies)
1296 1297 # Extra filtering to drop copy information for files that existed before
1297 1298 # the graft. This is to handle the case of grafting a rename onto a commit
1298 1299 # that already has the rename. Otherwise the presence of copy information
1299 1300 # would result in the creation of an empty commit where we would prefer to
1300 1301 # not create one.
1301 1302 for dest, __ in list(new_copies.items()):
1302 1303 if dest in parent:
1303 1304 del new_copies[dest]
1304 1305 for dst, src in pycompat.iteritems(new_copies):
1305 1306 wctx[dst].markcopied(src)
@@ -1,3293 +1,3300
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15 from __future__ import absolute_import
16 16
17 17 import binascii
18 18 import collections
19 19 import contextlib
20 20 import errno
21 21 import io
22 22 import os
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullrev,
31 31 sha1nodeconstants,
32 32 short,
33 33 wdirrev,
34 34 )
35 35 from .i18n import _
36 36 from .pycompat import getattr
37 37 from .revlogutils.constants import (
38 38 ALL_KINDS,
39 39 CHANGELOGV2,
40 40 COMP_MODE_DEFAULT,
41 41 COMP_MODE_INLINE,
42 42 COMP_MODE_PLAIN,
43 43 FEATURES_BY_VERSION,
44 44 FLAG_GENERALDELTA,
45 45 FLAG_INLINE_DATA,
46 46 INDEX_HEADER,
47 47 KIND_CHANGELOG,
48 48 REVLOGV0,
49 49 REVLOGV1,
50 50 REVLOGV1_FLAGS,
51 51 REVLOGV2,
52 52 REVLOGV2_FLAGS,
53 53 REVLOG_DEFAULT_FLAGS,
54 54 REVLOG_DEFAULT_FORMAT,
55 55 REVLOG_DEFAULT_VERSION,
56 56 SUPPORTED_FLAGS,
57 57 )
58 58 from .revlogutils.flagutil import (
59 59 REVIDX_DEFAULT_FLAGS,
60 60 REVIDX_ELLIPSIS,
61 61 REVIDX_EXTSTORED,
62 62 REVIDX_FLAGS_ORDER,
63 63 REVIDX_HASCOPIESINFO,
64 64 REVIDX_ISCENSORED,
65 65 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 66 )
67 67 from .thirdparty import attr
68 68 from . import (
69 69 ancestor,
70 70 dagop,
71 71 error,
72 72 mdiff,
73 73 policy,
74 74 pycompat,
75 75 revlogutils,
76 76 templatefilters,
77 77 util,
78 78 )
79 79 from .interfaces import (
80 80 repository,
81 81 util as interfaceutil,
82 82 )
83 83 from .revlogutils import (
84 84 censor,
85 85 deltas as deltautil,
86 86 docket as docketutil,
87 87 flagutil,
88 88 nodemap as nodemaputil,
89 89 randomaccessfile,
90 90 revlogv0,
91 91 sidedata as sidedatautil,
92 92 )
93 93 from .utils import (
94 94 storageutil,
95 95 stringutil,
96 96 )
97 97
98 98 # blanked usage of all the name to prevent pyflakes constraints
99 99 # We need these name available in the module for extensions.
100 100
101 101 REVLOGV0
102 102 REVLOGV1
103 103 REVLOGV2
104 104 FLAG_INLINE_DATA
105 105 FLAG_GENERALDELTA
106 106 REVLOG_DEFAULT_FLAGS
107 107 REVLOG_DEFAULT_FORMAT
108 108 REVLOG_DEFAULT_VERSION
109 109 REVLOGV1_FLAGS
110 110 REVLOGV2_FLAGS
111 111 REVIDX_ISCENSORED
112 112 REVIDX_ELLIPSIS
113 113 REVIDX_HASCOPIESINFO
114 114 REVIDX_EXTSTORED
115 115 REVIDX_DEFAULT_FLAGS
116 116 REVIDX_FLAGS_ORDER
117 117 REVIDX_RAWTEXT_CHANGING_FLAGS
118 118
119 119 parsers = policy.importmod('parsers')
120 120 rustancestor = policy.importrust('ancestor')
121 121 rustdagop = policy.importrust('dagop')
122 122 rustrevlog = policy.importrust('revlog')
123 123
124 124 # Aliased for performance.
125 125 _zlibdecompress = zlib.decompress
126 126
127 127 # max size of revlog with inline data
128 128 _maxinline = 131072
129 129
130 130 # Flag processors for REVIDX_ELLIPSIS.
131 131 def ellipsisreadprocessor(rl, text):
132 132 return text, False
133 133
134 134
135 135 def ellipsiswriteprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsisrawprocessor(rl, text):
140 140 return False
141 141
142 142
143 143 ellipsisprocessor = (
144 144 ellipsisreadprocessor,
145 145 ellipsiswriteprocessor,
146 146 ellipsisrawprocessor,
147 147 )
148 148
149 149
150 150 def _verify_revision(rl, skipflags, state, node):
151 151 """Verify the integrity of the given revlog ``node`` while providing a hook
152 152 point for extensions to influence the operation."""
153 153 if skipflags:
154 154 state[b'skipread'].add(node)
155 155 else:
156 156 # Side-effect: read content and verify hash.
157 157 rl.revision(node)
158 158
159 159
160 160 # True if a fast implementation for persistent-nodemap is available
161 161 #
162 162 # We also consider we have a "fast" implementation in "pure" python because
163 163 # people using pure don't really have performance consideration (and a
164 164 # wheelbarrow of other slowness source)
165 165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166 166 parsers, 'BaseIndexObject'
167 167 )
168 168
169 169
170 170 @interfaceutil.implementer(repository.irevisiondelta)
171 171 @attr.s(slots=True)
172 172 class revlogrevisiondelta(object):
173 173 node = attr.ib()
174 174 p1node = attr.ib()
175 175 p2node = attr.ib()
176 176 basenode = attr.ib()
177 177 flags = attr.ib()
178 178 baserevisionsize = attr.ib()
179 179 revision = attr.ib()
180 180 delta = attr.ib()
181 181 sidedata = attr.ib()
182 182 protocol_flags = attr.ib()
183 183 linknode = attr.ib(default=None)
184 184
185 185
186 186 @interfaceutil.implementer(repository.iverifyproblem)
187 187 @attr.s(frozen=True)
188 188 class revlogproblem(object):
189 189 warning = attr.ib(default=None)
190 190 error = attr.ib(default=None)
191 191 node = attr.ib(default=None)
192 192
193 193
194 194 def parse_index_v1(data, inline):
195 195 # call the C implementation to parse the index data
196 196 index, cache = parsers.parse_index2(data, inline)
197 197 return index, cache
198 198
199 199
200 200 def parse_index_v2(data, inline):
201 201 # call the C implementation to parse the index data
202 202 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
203 203 return index, cache
204 204
205 205
206 206 def parse_index_cl_v2(data, inline):
207 207 # call the C implementation to parse the index data
208 208 assert not inline
209 209 from .pure.parsers import parse_index_cl_v2
210 210
211 211 index, cache = parse_index_cl_v2(data)
212 212 return index, cache
213 213
214 214
215 215 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
216 216
217 217 def parse_index_v1_nodemap(data, inline):
218 218 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 219 return index, cache
220 220
221 221
222 222 else:
223 223 parse_index_v1_nodemap = None
224 224
225 225
226 226 def parse_index_v1_mixed(data, inline):
227 227 index, cache = parse_index_v1(data, inline)
228 228 return rustrevlog.MixedIndex(index), cache
229 229
230 230
231 231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 232 # signed integer)
233 233 _maxentrysize = 0x7FFFFFFF
234 234
235 235 FILE_TOO_SHORT_MSG = _(
236 236 b'cannot read from revlog %s;'
237 237 b' expected %d bytes from offset %d, data size is %d'
238 238 )
239 239
240 240
241 241 class revlog(object):
242 242 """
243 243 the underlying revision storage object
244 244
245 245 A revlog consists of two parts, an index and the revision data.
246 246
247 247 The index is a file with a fixed record size containing
248 248 information on each revision, including its nodeid (hash), the
249 249 nodeids of its parents, the position and offset of its data within
250 250 the data file, and the revision it's based on. Finally, each entry
251 251 contains a linkrev entry that can serve as a pointer to external
252 252 data.
253 253
254 254 The revision data itself is a linear collection of data chunks.
255 255 Each chunk represents a revision and is usually represented as a
256 256 delta against the previous chunk. To bound lookup time, runs of
257 257 deltas are limited to about 2 times the length of the original
258 258 version data. This makes retrieval of a version proportional to
259 259 its size, or O(1) relative to the number of revisions.
260 260
261 261 Both pieces of the revlog are written to in an append-only
262 262 fashion, which means we never need to rewrite a file to insert or
263 263 remove data, and can use some simple techniques to avoid the need
264 264 for locking while reading.
265 265
266 266 If checkambig, indexfile is opened with checkambig=True at
267 267 writing, to avoid file stat ambiguity.
268 268
269 269 If mmaplargeindex is True, and an mmapindexthreshold is set, the
270 270 index will be mmapped rather than read if it is larger than the
271 271 configured threshold.
272 272
273 273 If censorable is True, the revlog can have censored revisions.
274 274
275 275 If `upperboundcomp` is not None, this is the expected maximal gain from
276 276 compression for the data content.
277 277
278 278 `concurrencychecker` is an optional function that receives 3 arguments: a
279 279 file handle, a filename, and an expected position. It should check whether
280 280 the current position in the file handle is valid, and log/warn/fail (by
281 281 raising).
282 282
283 283 See mercurial/revlogutils/contants.py for details about the content of an
284 284 index entry.
285 285 """
286 286
287 287 _flagserrorclass = error.RevlogError
288 288
289 289 def __init__(
290 290 self,
291 291 opener,
292 292 target,
293 293 radix,
294 294 postfix=None, # only exist for `tmpcensored` now
295 295 checkambig=False,
296 296 mmaplargeindex=False,
297 297 censorable=False,
298 298 upperboundcomp=None,
299 299 persistentnodemap=False,
300 300 concurrencychecker=None,
301 301 trypending=False,
302 302 ):
303 303 """
304 304 create a revlog object
305 305
306 306 opener is a function that abstracts the file opening operation
307 307 and can be used to implement COW semantics or the like.
308 308
309 309 `target`: a (KIND, ID) tuple that identify the content stored in
310 310 this revlog. It help the rest of the code to understand what the revlog
311 311 is about without having to resort to heuristic and index filename
312 312 analysis. Note: that this must be reliably be set by normal code, but
313 313 that test, debug, or performance measurement code might not set this to
314 314 accurate value.
315 315 """
316 316 self.upperboundcomp = upperboundcomp
317 317
318 318 self.radix = radix
319 319
320 320 self._docket_file = None
321 321 self._indexfile = None
322 322 self._datafile = None
323 323 self._sidedatafile = None
324 324 self._nodemap_file = None
325 325 self.postfix = postfix
326 326 self._trypending = trypending
327 327 self.opener = opener
328 328 if persistentnodemap:
329 329 self._nodemap_file = nodemaputil.get_nodemap_file(self)
330 330
331 331 assert target[0] in ALL_KINDS
332 332 assert len(target) == 2
333 333 self.target = target
334 334 # When True, indexfile is opened with checkambig=True at writing, to
335 335 # avoid file stat ambiguity.
336 336 self._checkambig = checkambig
337 337 self._mmaplargeindex = mmaplargeindex
338 338 self._censorable = censorable
339 339 # 3-tuple of (node, rev, text) for a raw revision.
340 340 self._revisioncache = None
341 341 # Maps rev to chain base rev.
342 342 self._chainbasecache = util.lrucachedict(100)
343 343 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
344 344 self._chunkcache = (0, b'')
345 345 # How much data to read and cache into the raw revlog data cache.
346 346 self._chunkcachesize = 65536
347 347 self._maxchainlen = None
348 348 self._deltabothparents = True
349 349 self.index = None
350 350 self._docket = None
351 351 self._nodemap_docket = None
352 352 # Mapping of partial identifiers to full nodes.
353 353 self._pcache = {}
354 354 # Mapping of revision integer to full node.
355 355 self._compengine = b'zlib'
356 356 self._compengineopts = {}
357 357 self._maxdeltachainspan = -1
358 358 self._withsparseread = False
359 359 self._sparserevlog = False
360 360 self.hassidedata = False
361 361 self._srdensitythreshold = 0.50
362 362 self._srmingapsize = 262144
363 363
364 364 # Make copy of flag processors so each revlog instance can support
365 365 # custom flags.
366 366 self._flagprocessors = dict(flagutil.flagprocessors)
367 367
368 368 # 3-tuple of file handles being used for active writing.
369 369 self._writinghandles = None
370 370 # prevent nesting of addgroup
371 371 self._adding_group = None
372 372
373 373 self._loadindex()
374 374
375 375 self._concurrencychecker = concurrencychecker
376 376
377 377 def _init_opts(self):
378 378 """process options (from above/config) to setup associated default revlog mode
379 379
380 380 These values might be affected when actually reading on disk information.
381 381
382 382 The relevant values are returned for use in _loadindex().
383 383
384 384 * newversionflags:
385 385 version header to use if we need to create a new revlog
386 386
387 387 * mmapindexthreshold:
388 388 minimal index size for start to use mmap
389 389
390 390 * force_nodemap:
391 391 force the usage of a "development" version of the nodemap code
392 392 """
393 393 mmapindexthreshold = None
394 394 opts = self.opener.options
395 395
396 396 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
397 397 new_header = CHANGELOGV2
398 398 elif b'revlogv2' in opts:
399 399 new_header = REVLOGV2
400 400 elif b'revlogv1' in opts:
401 401 new_header = REVLOGV1 | FLAG_INLINE_DATA
402 402 if b'generaldelta' in opts:
403 403 new_header |= FLAG_GENERALDELTA
404 404 elif b'revlogv0' in self.opener.options:
405 405 new_header = REVLOGV0
406 406 else:
407 407 new_header = REVLOG_DEFAULT_VERSION
408 408
409 409 if b'chunkcachesize' in opts:
410 410 self._chunkcachesize = opts[b'chunkcachesize']
411 411 if b'maxchainlen' in opts:
412 412 self._maxchainlen = opts[b'maxchainlen']
413 413 if b'deltabothparents' in opts:
414 414 self._deltabothparents = opts[b'deltabothparents']
415 415 self._lazydelta = bool(opts.get(b'lazydelta', True))
416 416 self._lazydeltabase = False
417 417 if self._lazydelta:
418 418 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
419 419 if b'compengine' in opts:
420 420 self._compengine = opts[b'compengine']
421 421 if b'zlib.level' in opts:
422 422 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
423 423 if b'zstd.level' in opts:
424 424 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
425 425 if b'maxdeltachainspan' in opts:
426 426 self._maxdeltachainspan = opts[b'maxdeltachainspan']
427 427 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
428 428 mmapindexthreshold = opts[b'mmapindexthreshold']
429 429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
430 430 withsparseread = bool(opts.get(b'with-sparse-read', False))
431 431 # sparse-revlog forces sparse-read
432 432 self._withsparseread = self._sparserevlog or withsparseread
433 433 if b'sparse-read-density-threshold' in opts:
434 434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
435 435 if b'sparse-read-min-gap-size' in opts:
436 436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
437 437 if opts.get(b'enableellipsis'):
438 438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
439 439
440 440 # revlog v0 doesn't have flag processors
441 441 for flag, processor in pycompat.iteritems(
442 442 opts.get(b'flagprocessors', {})
443 443 ):
444 444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
445 445
446 446 if self._chunkcachesize <= 0:
447 447 raise error.RevlogError(
448 448 _(b'revlog chunk cache size %r is not greater than 0')
449 449 % self._chunkcachesize
450 450 )
451 451 elif self._chunkcachesize & (self._chunkcachesize - 1):
452 452 raise error.RevlogError(
453 453 _(b'revlog chunk cache size %r is not a power of 2')
454 454 % self._chunkcachesize
455 455 )
456 456 force_nodemap = opts.get(b'devel-force-nodemap', False)
457 457 return new_header, mmapindexthreshold, force_nodemap
458 458
459 459 def _get_data(self, filepath, mmap_threshold, size=None):
460 460 """return a file content with or without mmap
461 461
462 462 If the file is missing return the empty string"""
463 463 try:
464 464 with self.opener(filepath) as fp:
465 465 if mmap_threshold is not None:
466 466 file_size = self.opener.fstat(fp).st_size
467 467 if file_size >= mmap_threshold:
468 468 if size is not None:
469 469 # avoid potentiel mmap crash
470 470 size = min(file_size, size)
471 471 # TODO: should .close() to release resources without
472 472 # relying on Python GC
473 473 if size is None:
474 474 return util.buffer(util.mmapread(fp))
475 475 else:
476 476 return util.buffer(util.mmapread(fp, size))
477 477 if size is None:
478 478 return fp.read()
479 479 else:
480 480 return fp.read(size)
481 481 except IOError as inst:
482 482 if inst.errno != errno.ENOENT:
483 483 raise
484 484 return b''
485 485
486 486 def _loadindex(self, docket=None):
487 487
488 488 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
489 489
490 490 if self.postfix is not None:
491 491 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
492 492 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
493 493 entry_point = b'%s.i.a' % self.radix
494 494 else:
495 495 entry_point = b'%s.i' % self.radix
496 496
497 497 if docket is not None:
498 498 self._docket = docket
499 499 self._docket_file = entry_point
500 500 else:
501 501 entry_data = b''
502 502 self._initempty = True
503 503 entry_data = self._get_data(entry_point, mmapindexthreshold)
504 504 if len(entry_data) > 0:
505 505 header = INDEX_HEADER.unpack(entry_data[:4])[0]
506 506 self._initempty = False
507 507 else:
508 508 header = new_header
509 509
510 510 self._format_flags = header & ~0xFFFF
511 511 self._format_version = header & 0xFFFF
512 512
513 513 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
514 514 if supported_flags is None:
515 515 msg = _(b'unknown version (%d) in revlog %s')
516 516 msg %= (self._format_version, self.display_id)
517 517 raise error.RevlogError(msg)
518 518 elif self._format_flags & ~supported_flags:
519 519 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
520 520 display_flag = self._format_flags >> 16
521 521 msg %= (display_flag, self._format_version, self.display_id)
522 522 raise error.RevlogError(msg)
523 523
524 524 features = FEATURES_BY_VERSION[self._format_version]
525 525 self._inline = features[b'inline'](self._format_flags)
526 526 self._generaldelta = features[b'generaldelta'](self._format_flags)
527 527 self.hassidedata = features[b'sidedata']
528 528
529 529 if not features[b'docket']:
530 530 self._indexfile = entry_point
531 531 index_data = entry_data
532 532 else:
533 533 self._docket_file = entry_point
534 534 if self._initempty:
535 535 self._docket = docketutil.default_docket(self, header)
536 536 else:
537 537 self._docket = docketutil.parse_docket(
538 538 self, entry_data, use_pending=self._trypending
539 539 )
540 540
541 541 if self._docket is not None:
542 542 self._indexfile = self._docket.index_filepath()
543 543 index_data = b''
544 544 index_size = self._docket.index_end
545 545 if index_size > 0:
546 546 index_data = self._get_data(
547 547 self._indexfile, mmapindexthreshold, size=index_size
548 548 )
549 549 if len(index_data) < index_size:
550 550 msg = _(b'too few index data for %s: got %d, expected %d')
551 551 msg %= (self.display_id, len(index_data), index_size)
552 552 raise error.RevlogError(msg)
553 553
554 554 self._inline = False
555 555 # generaldelta implied by version 2 revlogs.
556 556 self._generaldelta = True
557 557 # the logic for persistent nodemap will be dealt with within the
558 558 # main docket, so disable it for now.
559 559 self._nodemap_file = None
560 560
561 561 if self._docket is not None:
562 562 self._datafile = self._docket.data_filepath()
563 563 self._sidedatafile = self._docket.sidedata_filepath()
564 564 elif self.postfix is None:
565 565 self._datafile = b'%s.d' % self.radix
566 566 else:
567 567 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
568 568
569 569 self.nodeconstants = sha1nodeconstants
570 570 self.nullid = self.nodeconstants.nullid
571 571
572 572 # sparse-revlog can't be on without general-delta (issue6056)
573 573 if not self._generaldelta:
574 574 self._sparserevlog = False
575 575
576 576 self._storedeltachains = True
577 577
578 578 devel_nodemap = (
579 579 self._nodemap_file
580 580 and force_nodemap
581 581 and parse_index_v1_nodemap is not None
582 582 )
583 583
584 584 use_rust_index = False
585 585 if rustrevlog is not None:
586 586 if self._nodemap_file is not None:
587 587 use_rust_index = True
588 588 else:
589 589 use_rust_index = self.opener.options.get(b'rust.index')
590 590
591 591 self._parse_index = parse_index_v1
592 592 if self._format_version == REVLOGV0:
593 593 self._parse_index = revlogv0.parse_index_v0
594 594 elif self._format_version == REVLOGV2:
595 595 self._parse_index = parse_index_v2
596 596 elif self._format_version == CHANGELOGV2:
597 597 self._parse_index = parse_index_cl_v2
598 598 elif devel_nodemap:
599 599 self._parse_index = parse_index_v1_nodemap
600 600 elif use_rust_index:
601 601 self._parse_index = parse_index_v1_mixed
602 602 try:
603 603 d = self._parse_index(index_data, self._inline)
604 604 index, chunkcache = d
605 605 use_nodemap = (
606 606 not self._inline
607 607 and self._nodemap_file is not None
608 608 and util.safehasattr(index, 'update_nodemap_data')
609 609 )
610 610 if use_nodemap:
611 611 nodemap_data = nodemaputil.persisted_data(self)
612 612 if nodemap_data is not None:
613 613 docket = nodemap_data[0]
614 614 if (
615 615 len(d[0]) > docket.tip_rev
616 616 and d[0][docket.tip_rev][7] == docket.tip_node
617 617 ):
618 618 # no changelog tampering
619 619 self._nodemap_docket = docket
620 620 index.update_nodemap_data(*nodemap_data)
621 621 except (ValueError, IndexError):
622 622 raise error.RevlogError(
623 623 _(b"index %s is corrupted") % self.display_id
624 624 )
625 625 self.index = index
626 626 self._segmentfile = randomaccessfile.randomaccessfile(
627 627 self.opener,
628 628 (self._indexfile if self._inline else self._datafile),
629 629 self._chunkcachesize,
630 630 chunkcache,
631 631 )
632 632 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
633 633 self.opener,
634 634 self._sidedatafile,
635 635 self._chunkcachesize,
636 636 )
637 637 # revnum -> (chain-length, sum-delta-length)
638 638 self._chaininfocache = util.lrucachedict(500)
639 639 # revlog header -> revlog compressor
640 640 self._decompressors = {}
641 641
642 642 @util.propertycache
643 643 def revlog_kind(self):
644 644 return self.target[0]
645 645
646 646 @util.propertycache
647 647 def display_id(self):
648 648 """The public facing "ID" of the revlog that we use in message"""
649 649 # Maybe we should build a user facing representation of
650 650 # revlog.target instead of using `self.radix`
651 651 return self.radix
652 652
653 653 def _get_decompressor(self, t):
654 654 try:
655 655 compressor = self._decompressors[t]
656 656 except KeyError:
657 657 try:
658 658 engine = util.compengines.forrevlogheader(t)
659 659 compressor = engine.revlogcompressor(self._compengineopts)
660 660 self._decompressors[t] = compressor
661 661 except KeyError:
662 662 raise error.RevlogError(
663 663 _(b'unknown compression type %s') % binascii.hexlify(t)
664 664 )
665 665 return compressor
666 666
667 667 @util.propertycache
668 668 def _compressor(self):
669 669 engine = util.compengines[self._compengine]
670 670 return engine.revlogcompressor(self._compengineopts)
671 671
672 672 @util.propertycache
673 673 def _decompressor(self):
674 674 """the default decompressor"""
675 675 if self._docket is None:
676 676 return None
677 677 t = self._docket.default_compression_header
678 678 c = self._get_decompressor(t)
679 679 return c.decompress
680 680
681 681 def _indexfp(self):
682 682 """file object for the revlog's index file"""
683 683 return self.opener(self._indexfile, mode=b"r")
684 684
685 685 def __index_write_fp(self):
686 686 # You should not use this directly and use `_writing` instead
687 687 try:
688 688 f = self.opener(
689 689 self._indexfile, mode=b"r+", checkambig=self._checkambig
690 690 )
691 691 if self._docket is None:
692 692 f.seek(0, os.SEEK_END)
693 693 else:
694 694 f.seek(self._docket.index_end, os.SEEK_SET)
695 695 return f
696 696 except IOError as inst:
697 697 if inst.errno != errno.ENOENT:
698 698 raise
699 699 return self.opener(
700 700 self._indexfile, mode=b"w+", checkambig=self._checkambig
701 701 )
702 702
703 703 def __index_new_fp(self):
704 704 # You should not use this unless you are upgrading from inline revlog
705 705 return self.opener(
706 706 self._indexfile,
707 707 mode=b"w",
708 708 checkambig=self._checkambig,
709 709 atomictemp=True,
710 710 )
711 711
712 712 def _datafp(self, mode=b'r'):
713 713 """file object for the revlog's data file"""
714 714 return self.opener(self._datafile, mode=mode)
715 715
716 716 @contextlib.contextmanager
717 717 def _sidedatareadfp(self):
718 718 """file object suitable to read sidedata"""
719 719 if self._writinghandles:
720 720 yield self._writinghandles[2]
721 721 else:
722 722 with self.opener(self._sidedatafile) as fp:
723 723 yield fp
724 724
725 725 def tiprev(self):
726 726 return len(self.index) - 1
727 727
728 728 def tip(self):
729 729 return self.node(self.tiprev())
730 730
731 731 def __contains__(self, rev):
732 732 return 0 <= rev < len(self)
733 733
734 734 def __len__(self):
735 735 return len(self.index)
736 736
737 737 def __iter__(self):
738 738 return iter(pycompat.xrange(len(self)))
739 739
740 740 def revs(self, start=0, stop=None):
741 741 """iterate over all rev in this revlog (from start to stop)"""
742 742 return storageutil.iterrevs(len(self), start=start, stop=stop)
743 743
744 744 @property
745 745 def nodemap(self):
746 746 msg = (
747 747 b"revlog.nodemap is deprecated, "
748 748 b"use revlog.index.[has_node|rev|get_rev]"
749 749 )
750 750 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
751 751 return self.index.nodemap
752 752
753 753 @property
754 754 def _nodecache(self):
755 755 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
756 756 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
757 757 return self.index.nodemap
758 758
759 759 def hasnode(self, node):
760 760 try:
761 761 self.rev(node)
762 762 return True
763 763 except KeyError:
764 764 return False
765 765
766 766 def candelta(self, baserev, rev):
767 767 """whether two revisions (baserev, rev) can be delta-ed or not"""
768 768 # Disable delta if either rev requires a content-changing flag
769 769 # processor (ex. LFS). This is because such flag processor can alter
770 770 # the rawtext content that the delta will be based on, and two clients
771 771 # could have a same revlog node with different flags (i.e. different
772 772 # rawtext contents) and the delta could be incompatible.
773 773 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
774 774 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
775 775 ):
776 776 return False
777 777 return True
778 778
779 779 def update_caches(self, transaction):
780 780 if self._nodemap_file is not None:
781 781 if transaction is None:
782 782 nodemaputil.update_persistent_nodemap(self)
783 783 else:
784 784 nodemaputil.setup_persistent_nodemap(transaction, self)
785 785
786 786 def clearcaches(self):
787 787 self._revisioncache = None
788 788 self._chainbasecache.clear()
789 789 self._segmentfile.clear_cache()
790 790 self._segmentfile_sidedata.clear_cache()
791 791 self._pcache = {}
792 792 self._nodemap_docket = None
793 793 self.index.clearcaches()
794 794 # The python code is the one responsible for validating the docket, we
795 795 # end up having to refresh it here.
796 796 use_nodemap = (
797 797 not self._inline
798 798 and self._nodemap_file is not None
799 799 and util.safehasattr(self.index, 'update_nodemap_data')
800 800 )
801 801 if use_nodemap:
802 802 nodemap_data = nodemaputil.persisted_data(self)
803 803 if nodemap_data is not None:
804 804 self._nodemap_docket = nodemap_data[0]
805 805 self.index.update_nodemap_data(*nodemap_data)
806 806
807 807 def rev(self, node):
808 808 try:
809 809 return self.index.rev(node)
810 810 except TypeError:
811 811 raise
812 812 except error.RevlogError:
813 813 # parsers.c radix tree lookup failed
814 814 if (
815 815 node == self.nodeconstants.wdirid
816 816 or node in self.nodeconstants.wdirfilenodeids
817 817 ):
818 818 raise error.WdirUnsupported
819 819 raise error.LookupError(node, self.display_id, _(b'no node'))
820 820
821 821 # Accessors for index entries.
822 822
823 823 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
824 824 # are flags.
825 825 def start(self, rev):
826 826 return int(self.index[rev][0] >> 16)
827 827
828 828 def sidedata_cut_off(self, rev):
829 829 sd_cut_off = self.index[rev][8]
830 830 if sd_cut_off != 0:
831 831 return sd_cut_off
832 832 # This is some annoying dance, because entries without sidedata
833 833 # currently use 0 as their ofsset. (instead of previous-offset +
834 834 # previous-size)
835 835 #
836 836 # We should reconsider this sidedata → 0 sidata_offset policy.
837 837 # In the meantime, we need this.
838 838 while 0 <= rev:
839 839 e = self.index[rev]
840 840 if e[9] != 0:
841 841 return e[8] + e[9]
842 842 rev -= 1
843 843 return 0
844 844
845 845 def flags(self, rev):
846 846 return self.index[rev][0] & 0xFFFF
847 847
848 848 def length(self, rev):
849 849 return self.index[rev][1]
850 850
851 851 def sidedata_length(self, rev):
852 852 if not self.hassidedata:
853 853 return 0
854 854 return self.index[rev][9]
855 855
856 856 def rawsize(self, rev):
857 857 """return the length of the uncompressed text for a given revision"""
858 858 l = self.index[rev][2]
859 859 if l >= 0:
860 860 return l
861 861
862 862 t = self.rawdata(rev)
863 863 return len(t)
864 864
865 865 def size(self, rev):
866 866 """length of non-raw text (processed by a "read" flag processor)"""
867 867 # fast path: if no "read" flag processor could change the content,
868 868 # size is rawsize. note: ELLIPSIS is known to not change the content.
869 869 flags = self.flags(rev)
870 870 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
871 871 return self.rawsize(rev)
872 872
873 873 return len(self.revision(rev, raw=False))
874 874
875 875 def chainbase(self, rev):
876 876 base = self._chainbasecache.get(rev)
877 877 if base is not None:
878 878 return base
879 879
880 880 index = self.index
881 881 iterrev = rev
882 882 base = index[iterrev][3]
883 883 while base != iterrev:
884 884 iterrev = base
885 885 base = index[iterrev][3]
886 886
887 887 self._chainbasecache[rev] = base
888 888 return base
889 889
890 890 def linkrev(self, rev):
891 891 return self.index[rev][4]
892 892
893 893 def parentrevs(self, rev):
894 894 try:
895 895 entry = self.index[rev]
896 896 except IndexError:
897 897 if rev == wdirrev:
898 898 raise error.WdirUnsupported
899 899 raise
900 900 if entry[5] == nullrev:
901 901 return entry[6], entry[5]
902 902 else:
903 903 return entry[5], entry[6]
904 904
905 905 # fast parentrevs(rev) where rev isn't filtered
906 906 _uncheckedparentrevs = parentrevs
907 907
908 908 def node(self, rev):
909 909 try:
910 910 return self.index[rev][7]
911 911 except IndexError:
912 912 if rev == wdirrev:
913 913 raise error.WdirUnsupported
914 914 raise
915 915
916 916 # Derived from index values.
917 917
918 918 def end(self, rev):
919 919 return self.start(rev) + self.length(rev)
920 920
921 921 def parents(self, node):
922 922 i = self.index
923 923 d = i[self.rev(node)]
924 924 # inline node() to avoid function call overhead
925 925 if d[5] == self.nullid:
926 926 return i[d[6]][7], i[d[5]][7]
927 927 else:
928 928 return i[d[5]][7], i[d[6]][7]
929 929
930 930 def chainlen(self, rev):
931 931 return self._chaininfo(rev)[0]
932 932
933 933 def _chaininfo(self, rev):
934 934 chaininfocache = self._chaininfocache
935 935 if rev in chaininfocache:
936 936 return chaininfocache[rev]
937 937 index = self.index
938 938 generaldelta = self._generaldelta
939 939 iterrev = rev
940 940 e = index[iterrev]
941 941 clen = 0
942 942 compresseddeltalen = 0
943 943 while iterrev != e[3]:
944 944 clen += 1
945 945 compresseddeltalen += e[1]
946 946 if generaldelta:
947 947 iterrev = e[3]
948 948 else:
949 949 iterrev -= 1
950 950 if iterrev in chaininfocache:
951 951 t = chaininfocache[iterrev]
952 952 clen += t[0]
953 953 compresseddeltalen += t[1]
954 954 break
955 955 e = index[iterrev]
956 956 else:
957 957 # Add text length of base since decompressing that also takes
958 958 # work. For cache hits the length is already included.
959 959 compresseddeltalen += e[1]
960 960 r = (clen, compresseddeltalen)
961 961 chaininfocache[rev] = r
962 962 return r
963 963
964 964 def _deltachain(self, rev, stoprev=None):
965 965 """Obtain the delta chain for a revision.
966 966
967 967 ``stoprev`` specifies a revision to stop at. If not specified, we
968 968 stop at the base of the chain.
969 969
970 970 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
971 971 revs in ascending order and ``stopped`` is a bool indicating whether
972 972 ``stoprev`` was hit.
973 973 """
974 974 # Try C implementation.
975 975 try:
976 976 return self.index.deltachain(rev, stoprev, self._generaldelta)
977 977 except AttributeError:
978 978 pass
979 979
980 980 chain = []
981 981
982 982 # Alias to prevent attribute lookup in tight loop.
983 983 index = self.index
984 984 generaldelta = self._generaldelta
985 985
986 986 iterrev = rev
987 987 e = index[iterrev]
988 988 while iterrev != e[3] and iterrev != stoprev:
989 989 chain.append(iterrev)
990 990 if generaldelta:
991 991 iterrev = e[3]
992 992 else:
993 993 iterrev -= 1
994 994 e = index[iterrev]
995 995
996 996 if iterrev == stoprev:
997 997 stopped = True
998 998 else:
999 999 chain.append(iterrev)
1000 1000 stopped = False
1001 1001
1002 1002 chain.reverse()
1003 1003 return chain, stopped
1004 1004
1005 1005 def ancestors(self, revs, stoprev=0, inclusive=False):
1006 1006 """Generate the ancestors of 'revs' in reverse revision order.
1007 1007 Does not generate revs lower than stoprev.
1008 1008
1009 1009 See the documentation for ancestor.lazyancestors for more details."""
1010 1010
1011 1011 # first, make sure start revisions aren't filtered
1012 1012 revs = list(revs)
1013 1013 checkrev = self.node
1014 1014 for r in revs:
1015 1015 checkrev(r)
1016 1016 # and we're sure ancestors aren't filtered as well
1017 1017
1018 1018 if rustancestor is not None and self.index.rust_ext_compat:
1019 1019 lazyancestors = rustancestor.LazyAncestors
1020 1020 arg = self.index
1021 1021 else:
1022 1022 lazyancestors = ancestor.lazyancestors
1023 1023 arg = self._uncheckedparentrevs
1024 1024 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1025 1025
1026 1026 def descendants(self, revs):
1027 1027 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1028 1028
1029 1029 def findcommonmissing(self, common=None, heads=None):
1030 1030 """Return a tuple of the ancestors of common and the ancestors of heads
1031 1031 that are not ancestors of common. In revset terminology, we return the
1032 1032 tuple:
1033 1033
1034 1034 ::common, (::heads) - (::common)
1035 1035
1036 1036 The list is sorted by revision number, meaning it is
1037 1037 topologically sorted.
1038 1038
1039 1039 'heads' and 'common' are both lists of node IDs. If heads is
1040 1040 not supplied, uses all of the revlog's heads. If common is not
1041 1041 supplied, uses nullid."""
1042 1042 if common is None:
1043 1043 common = [self.nullid]
1044 1044 if heads is None:
1045 1045 heads = self.heads()
1046 1046
1047 1047 common = [self.rev(n) for n in common]
1048 1048 heads = [self.rev(n) for n in heads]
1049 1049
1050 1050 # we want the ancestors, but inclusive
1051 1051 class lazyset(object):
1052 1052 def __init__(self, lazyvalues):
1053 1053 self.addedvalues = set()
1054 1054 self.lazyvalues = lazyvalues
1055 1055
1056 1056 def __contains__(self, value):
1057 1057 return value in self.addedvalues or value in self.lazyvalues
1058 1058
1059 1059 def __iter__(self):
1060 1060 added = self.addedvalues
1061 1061 for r in added:
1062 1062 yield r
1063 1063 for r in self.lazyvalues:
1064 1064 if not r in added:
1065 1065 yield r
1066 1066
1067 1067 def add(self, value):
1068 1068 self.addedvalues.add(value)
1069 1069
1070 1070 def update(self, values):
1071 1071 self.addedvalues.update(values)
1072 1072
1073 1073 has = lazyset(self.ancestors(common))
1074 1074 has.add(nullrev)
1075 1075 has.update(common)
1076 1076
1077 1077 # take all ancestors from heads that aren't in has
1078 1078 missing = set()
1079 1079 visit = collections.deque(r for r in heads if r not in has)
1080 1080 while visit:
1081 1081 r = visit.popleft()
1082 1082 if r in missing:
1083 1083 continue
1084 1084 else:
1085 1085 missing.add(r)
1086 1086 for p in self.parentrevs(r):
1087 1087 if p not in has:
1088 1088 visit.append(p)
1089 1089 missing = list(missing)
1090 1090 missing.sort()
1091 1091 return has, [self.node(miss) for miss in missing]
1092 1092
1093 1093 def incrementalmissingrevs(self, common=None):
1094 1094 """Return an object that can be used to incrementally compute the
1095 1095 revision numbers of the ancestors of arbitrary sets that are not
1096 1096 ancestors of common. This is an ancestor.incrementalmissingancestors
1097 1097 object.
1098 1098
1099 1099 'common' is a list of revision numbers. If common is not supplied, uses
1100 1100 nullrev.
1101 1101 """
1102 1102 if common is None:
1103 1103 common = [nullrev]
1104 1104
1105 1105 if rustancestor is not None and self.index.rust_ext_compat:
1106 1106 return rustancestor.MissingAncestors(self.index, common)
1107 1107 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1108 1108
1109 1109 def findmissingrevs(self, common=None, heads=None):
1110 1110 """Return the revision numbers of the ancestors of heads that
1111 1111 are not ancestors of common.
1112 1112
1113 1113 More specifically, return a list of revision numbers corresponding to
1114 1114 nodes N such that every N satisfies the following constraints:
1115 1115
1116 1116 1. N is an ancestor of some node in 'heads'
1117 1117 2. N is not an ancestor of any node in 'common'
1118 1118
1119 1119 The list is sorted by revision number, meaning it is
1120 1120 topologically sorted.
1121 1121
1122 1122 'heads' and 'common' are both lists of revision numbers. If heads is
1123 1123 not supplied, uses all of the revlog's heads. If common is not
1124 1124 supplied, uses nullid."""
1125 1125 if common is None:
1126 1126 common = [nullrev]
1127 1127 if heads is None:
1128 1128 heads = self.headrevs()
1129 1129
1130 1130 inc = self.incrementalmissingrevs(common=common)
1131 1131 return inc.missingancestors(heads)
1132 1132
1133 1133 def findmissing(self, common=None, heads=None):
1134 1134 """Return the ancestors of heads that are not ancestors of common.
1135 1135
1136 1136 More specifically, return a list of nodes N such that every N
1137 1137 satisfies the following constraints:
1138 1138
1139 1139 1. N is an ancestor of some node in 'heads'
1140 1140 2. N is not an ancestor of any node in 'common'
1141 1141
1142 1142 The list is sorted by revision number, meaning it is
1143 1143 topologically sorted.
1144 1144
1145 1145 'heads' and 'common' are both lists of node IDs. If heads is
1146 1146 not supplied, uses all of the revlog's heads. If common is not
1147 1147 supplied, uses nullid."""
1148 1148 if common is None:
1149 1149 common = [self.nullid]
1150 1150 if heads is None:
1151 1151 heads = self.heads()
1152 1152
1153 1153 common = [self.rev(n) for n in common]
1154 1154 heads = [self.rev(n) for n in heads]
1155 1155
1156 1156 inc = self.incrementalmissingrevs(common=common)
1157 1157 return [self.node(r) for r in inc.missingancestors(heads)]
1158 1158
1159 1159 def nodesbetween(self, roots=None, heads=None):
1160 1160 """Return a topological path from 'roots' to 'heads'.
1161 1161
1162 1162 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1163 1163 topologically sorted list of all nodes N that satisfy both of
1164 1164 these constraints:
1165 1165
1166 1166 1. N is a descendant of some node in 'roots'
1167 1167 2. N is an ancestor of some node in 'heads'
1168 1168
1169 1169 Every node is considered to be both a descendant and an ancestor
1170 1170 of itself, so every reachable node in 'roots' and 'heads' will be
1171 1171 included in 'nodes'.
1172 1172
1173 1173 'outroots' is the list of reachable nodes in 'roots', i.e., the
1174 1174 subset of 'roots' that is returned in 'nodes'. Likewise,
1175 1175 'outheads' is the subset of 'heads' that is also in 'nodes'.
1176 1176
1177 1177 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1178 1178 unspecified, uses nullid as the only root. If 'heads' is
1179 1179 unspecified, uses list of all of the revlog's heads."""
1180 1180 nonodes = ([], [], [])
1181 1181 if roots is not None:
1182 1182 roots = list(roots)
1183 1183 if not roots:
1184 1184 return nonodes
1185 1185 lowestrev = min([self.rev(n) for n in roots])
1186 1186 else:
1187 1187 roots = [self.nullid] # Everybody's a descendant of nullid
1188 1188 lowestrev = nullrev
1189 1189 if (lowestrev == nullrev) and (heads is None):
1190 1190 # We want _all_ the nodes!
1191 1191 return (
1192 1192 [self.node(r) for r in self],
1193 1193 [self.nullid],
1194 1194 list(self.heads()),
1195 1195 )
1196 1196 if heads is None:
1197 1197 # All nodes are ancestors, so the latest ancestor is the last
1198 1198 # node.
1199 1199 highestrev = len(self) - 1
1200 1200 # Set ancestors to None to signal that every node is an ancestor.
1201 1201 ancestors = None
1202 1202 # Set heads to an empty dictionary for later discovery of heads
1203 1203 heads = {}
1204 1204 else:
1205 1205 heads = list(heads)
1206 1206 if not heads:
1207 1207 return nonodes
1208 1208 ancestors = set()
1209 1209 # Turn heads into a dictionary so we can remove 'fake' heads.
1210 1210 # Also, later we will be using it to filter out the heads we can't
1211 1211 # find from roots.
1212 1212 heads = dict.fromkeys(heads, False)
1213 1213 # Start at the top and keep marking parents until we're done.
1214 1214 nodestotag = set(heads)
1215 1215 # Remember where the top was so we can use it as a limit later.
1216 1216 highestrev = max([self.rev(n) for n in nodestotag])
1217 1217 while nodestotag:
1218 1218 # grab a node to tag
1219 1219 n = nodestotag.pop()
1220 1220 # Never tag nullid
1221 1221 if n == self.nullid:
1222 1222 continue
1223 1223 # A node's revision number represents its place in a
1224 1224 # topologically sorted list of nodes.
1225 1225 r = self.rev(n)
1226 1226 if r >= lowestrev:
1227 1227 if n not in ancestors:
1228 1228 # If we are possibly a descendant of one of the roots
1229 1229 # and we haven't already been marked as an ancestor
1230 1230 ancestors.add(n) # Mark as ancestor
1231 1231 # Add non-nullid parents to list of nodes to tag.
1232 1232 nodestotag.update(
1233 1233 [p for p in self.parents(n) if p != self.nullid]
1234 1234 )
1235 1235 elif n in heads: # We've seen it before, is it a fake head?
1236 1236 # So it is, real heads should not be the ancestors of
1237 1237 # any other heads.
1238 1238 heads.pop(n)
1239 1239 if not ancestors:
1240 1240 return nonodes
1241 1241 # Now that we have our set of ancestors, we want to remove any
1242 1242 # roots that are not ancestors.
1243 1243
1244 1244 # If one of the roots was nullid, everything is included anyway.
1245 1245 if lowestrev > nullrev:
1246 1246 # But, since we weren't, let's recompute the lowest rev to not
1247 1247 # include roots that aren't ancestors.
1248 1248
1249 1249 # Filter out roots that aren't ancestors of heads
1250 1250 roots = [root for root in roots if root in ancestors]
1251 1251 # Recompute the lowest revision
1252 1252 if roots:
1253 1253 lowestrev = min([self.rev(root) for root in roots])
1254 1254 else:
1255 1255 # No more roots? Return empty list
1256 1256 return nonodes
1257 1257 else:
1258 1258 # We are descending from nullid, and don't need to care about
1259 1259 # any other roots.
1260 1260 lowestrev = nullrev
1261 1261 roots = [self.nullid]
1262 1262 # Transform our roots list into a set.
1263 1263 descendants = set(roots)
1264 1264 # Also, keep the original roots so we can filter out roots that aren't
1265 1265 # 'real' roots (i.e. are descended from other roots).
1266 1266 roots = descendants.copy()
1267 1267 # Our topologically sorted list of output nodes.
1268 1268 orderedout = []
1269 1269 # Don't start at nullid since we don't want nullid in our output list,
1270 1270 # and if nullid shows up in descendants, empty parents will look like
1271 1271 # they're descendants.
1272 1272 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1273 1273 n = self.node(r)
1274 1274 isdescendant = False
1275 1275 if lowestrev == nullrev: # Everybody is a descendant of nullid
1276 1276 isdescendant = True
1277 1277 elif n in descendants:
1278 1278 # n is already a descendant
1279 1279 isdescendant = True
1280 1280 # This check only needs to be done here because all the roots
1281 1281 # will start being marked is descendants before the loop.
1282 1282 if n in roots:
1283 1283 # If n was a root, check if it's a 'real' root.
1284 1284 p = tuple(self.parents(n))
1285 1285 # If any of its parents are descendants, it's not a root.
1286 1286 if (p[0] in descendants) or (p[1] in descendants):
1287 1287 roots.remove(n)
1288 1288 else:
1289 1289 p = tuple(self.parents(n))
1290 1290 # A node is a descendant if either of its parents are
1291 1291 # descendants. (We seeded the dependents list with the roots
1292 1292 # up there, remember?)
1293 1293 if (p[0] in descendants) or (p[1] in descendants):
1294 1294 descendants.add(n)
1295 1295 isdescendant = True
1296 1296 if isdescendant and ((ancestors is None) or (n in ancestors)):
1297 1297 # Only include nodes that are both descendants and ancestors.
1298 1298 orderedout.append(n)
1299 1299 if (ancestors is not None) and (n in heads):
1300 1300 # We're trying to figure out which heads are reachable
1301 1301 # from roots.
1302 1302 # Mark this head as having been reached
1303 1303 heads[n] = True
1304 1304 elif ancestors is None:
1305 1305 # Otherwise, we're trying to discover the heads.
1306 1306 # Assume this is a head because if it isn't, the next step
1307 1307 # will eventually remove it.
1308 1308 heads[n] = True
1309 1309 # But, obviously its parents aren't.
1310 1310 for p in self.parents(n):
1311 1311 heads.pop(p, None)
1312 1312 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1313 1313 roots = list(roots)
1314 1314 assert orderedout
1315 1315 assert roots
1316 1316 assert heads
1317 1317 return (orderedout, roots, heads)
1318 1318
1319 1319 def headrevs(self, revs=None):
1320 1320 if revs is None:
1321 1321 try:
1322 1322 return self.index.headrevs()
1323 1323 except AttributeError:
1324 1324 return self._headrevs()
1325 1325 if rustdagop is not None and self.index.rust_ext_compat:
1326 1326 return rustdagop.headrevs(self.index, revs)
1327 1327 return dagop.headrevs(revs, self._uncheckedparentrevs)
1328 1328
1329 1329 def computephases(self, roots):
1330 1330 return self.index.computephasesmapsets(roots)
1331 1331
1332 1332 def _headrevs(self):
1333 1333 count = len(self)
1334 1334 if not count:
1335 1335 return [nullrev]
1336 1336 # we won't iter over filtered rev so nobody is a head at start
1337 1337 ishead = [0] * (count + 1)
1338 1338 index = self.index
1339 1339 for r in self:
1340 1340 ishead[r] = 1 # I may be an head
1341 1341 e = index[r]
1342 1342 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1343 1343 return [r for r, val in enumerate(ishead) if val]
1344 1344
1345 1345 def heads(self, start=None, stop=None):
1346 1346 """return the list of all nodes that have no children
1347 1347
1348 1348 if start is specified, only heads that are descendants of
1349 1349 start will be returned
1350 1350 if stop is specified, it will consider all the revs from stop
1351 1351 as if they had no children
1352 1352 """
1353 1353 if start is None and stop is None:
1354 1354 if not len(self):
1355 1355 return [self.nullid]
1356 1356 return [self.node(r) for r in self.headrevs()]
1357 1357
1358 1358 if start is None:
1359 1359 start = nullrev
1360 1360 else:
1361 1361 start = self.rev(start)
1362 1362
1363 1363 stoprevs = {self.rev(n) for n in stop or []}
1364 1364
1365 1365 revs = dagop.headrevssubset(
1366 1366 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1367 1367 )
1368 1368
1369 1369 return [self.node(rev) for rev in revs]
1370 1370
1371 1371 def children(self, node):
1372 1372 """find the children of a given node"""
1373 1373 c = []
1374 1374 p = self.rev(node)
1375 1375 for r in self.revs(start=p + 1):
1376 1376 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1377 1377 if prevs:
1378 1378 for pr in prevs:
1379 1379 if pr == p:
1380 1380 c.append(self.node(r))
1381 1381 elif p == nullrev:
1382 1382 c.append(self.node(r))
1383 1383 return c
1384 1384
1385 1385 def commonancestorsheads(self, a, b):
1386 1386 """calculate all the heads of the common ancestors of nodes a and b"""
1387 1387 a, b = self.rev(a), self.rev(b)
1388 1388 ancs = self._commonancestorsheads(a, b)
1389 1389 return pycompat.maplist(self.node, ancs)
1390 1390
1391 1391 def _commonancestorsheads(self, *revs):
1392 1392 """calculate all the heads of the common ancestors of revs"""
1393 1393 try:
1394 1394 ancs = self.index.commonancestorsheads(*revs)
1395 1395 except (AttributeError, OverflowError): # C implementation failed
1396 1396 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1397 1397 return ancs
1398 1398
1399 1399 def isancestor(self, a, b):
1400 1400 """return True if node a is an ancestor of node b
1401 1401
1402 1402 A revision is considered an ancestor of itself."""
1403 1403 a, b = self.rev(a), self.rev(b)
1404 1404 return self.isancestorrev(a, b)
1405 1405
1406 1406 def isancestorrev(self, a, b):
1407 1407 """return True if revision a is an ancestor of revision b
1408 1408
1409 1409 A revision is considered an ancestor of itself.
1410 1410
1411 1411 The implementation of this is trivial but the use of
1412 1412 reachableroots is not."""
1413 1413 if a == nullrev:
1414 1414 return True
1415 1415 elif a == b:
1416 1416 return True
1417 1417 elif a > b:
1418 1418 return False
1419 1419 return bool(self.reachableroots(a, [b], [a], includepath=False))
1420 1420
1421 1421 def reachableroots(self, minroot, heads, roots, includepath=False):
1422 1422 """return (heads(::(<roots> and <roots>::<heads>)))
1423 1423
1424 1424 If includepath is True, return (<roots>::<heads>)."""
1425 1425 try:
1426 1426 return self.index.reachableroots2(
1427 1427 minroot, heads, roots, includepath
1428 1428 )
1429 1429 except AttributeError:
1430 1430 return dagop._reachablerootspure(
1431 1431 self.parentrevs, minroot, roots, heads, includepath
1432 1432 )
1433 1433
1434 1434 def ancestor(self, a, b):
1435 1435 """calculate the "best" common ancestor of nodes a and b"""
1436 1436
1437 1437 a, b = self.rev(a), self.rev(b)
1438 1438 try:
1439 1439 ancs = self.index.ancestors(a, b)
1440 1440 except (AttributeError, OverflowError):
1441 1441 ancs = ancestor.ancestors(self.parentrevs, a, b)
1442 1442 if ancs:
1443 1443 # choose a consistent winner when there's a tie
1444 1444 return min(map(self.node, ancs))
1445 1445 return self.nullid
1446 1446
1447 1447 def _match(self, id):
1448 1448 if isinstance(id, int):
1449 1449 # rev
1450 1450 return self.node(id)
1451 1451 if len(id) == self.nodeconstants.nodelen:
1452 1452 # possibly a binary node
1453 1453 # odds of a binary node being all hex in ASCII are 1 in 10**25
1454 1454 try:
1455 1455 node = id
1456 1456 self.rev(node) # quick search the index
1457 1457 return node
1458 1458 except error.LookupError:
1459 1459 pass # may be partial hex id
1460 1460 try:
1461 1461 # str(rev)
1462 1462 rev = int(id)
1463 1463 if b"%d" % rev != id:
1464 1464 raise ValueError
1465 1465 if rev < 0:
1466 1466 rev = len(self) + rev
1467 1467 if rev < 0 or rev >= len(self):
1468 1468 raise ValueError
1469 1469 return self.node(rev)
1470 1470 except (ValueError, OverflowError):
1471 1471 pass
1472 1472 if len(id) == 2 * self.nodeconstants.nodelen:
1473 1473 try:
1474 1474 # a full hex nodeid?
1475 1475 node = bin(id)
1476 1476 self.rev(node)
1477 1477 return node
1478 1478 except (TypeError, error.LookupError):
1479 1479 pass
1480 1480
1481 1481 def _partialmatch(self, id):
1482 1482 # we don't care wdirfilenodeids as they should be always full hash
1483 1483 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1484 1484 ambiguous = False
1485 1485 try:
1486 1486 partial = self.index.partialmatch(id)
1487 1487 if partial and self.hasnode(partial):
1488 1488 if maybewdir:
1489 1489 # single 'ff...' match in radix tree, ambiguous with wdir
1490 1490 ambiguous = True
1491 1491 else:
1492 1492 return partial
1493 1493 elif maybewdir:
1494 1494 # no 'ff...' match in radix tree, wdir identified
1495 1495 raise error.WdirUnsupported
1496 1496 else:
1497 1497 return None
1498 1498 except error.RevlogError:
1499 1499 # parsers.c radix tree lookup gave multiple matches
1500 1500 # fast path: for unfiltered changelog, radix tree is accurate
1501 1501 if not getattr(self, 'filteredrevs', None):
1502 1502 ambiguous = True
1503 1503 # fall through to slow path that filters hidden revisions
1504 1504 except (AttributeError, ValueError):
1505 1505 # we are pure python, or key was too short to search radix tree
1506 1506 pass
1507 1507 if ambiguous:
1508 1508 raise error.AmbiguousPrefixLookupError(
1509 1509 id, self.display_id, _(b'ambiguous identifier')
1510 1510 )
1511 1511
1512 1512 if id in self._pcache:
1513 1513 return self._pcache[id]
1514 1514
1515 1515 if len(id) <= 40:
1516 1516 try:
1517 1517 # hex(node)[:...]
1518 1518 l = len(id) // 2 # grab an even number of digits
1519 1519 prefix = bin(id[: l * 2])
1520 1520 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1521 1521 nl = [
1522 1522 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1523 1523 ]
1524 1524 if self.nodeconstants.nullhex.startswith(id):
1525 1525 nl.append(self.nullid)
1526 1526 if len(nl) > 0:
1527 1527 if len(nl) == 1 and not maybewdir:
1528 1528 self._pcache[id] = nl[0]
1529 1529 return nl[0]
1530 1530 raise error.AmbiguousPrefixLookupError(
1531 1531 id, self.display_id, _(b'ambiguous identifier')
1532 1532 )
1533 1533 if maybewdir:
1534 1534 raise error.WdirUnsupported
1535 1535 return None
1536 1536 except TypeError:
1537 1537 pass
1538 1538
1539 1539 def lookup(self, id):
1540 1540 """locate a node based on:
1541 1541 - revision number or str(revision number)
1542 1542 - nodeid or subset of hex nodeid
1543 1543 """
1544 1544 n = self._match(id)
1545 1545 if n is not None:
1546 1546 return n
1547 1547 n = self._partialmatch(id)
1548 1548 if n:
1549 1549 return n
1550 1550
1551 1551 raise error.LookupError(id, self.display_id, _(b'no match found'))
1552 1552
1553 1553 def shortest(self, node, minlength=1):
1554 1554 """Find the shortest unambiguous prefix that matches node."""
1555 1555
1556 1556 def isvalid(prefix):
1557 1557 try:
1558 1558 matchednode = self._partialmatch(prefix)
1559 1559 except error.AmbiguousPrefixLookupError:
1560 1560 return False
1561 1561 except error.WdirUnsupported:
1562 1562 # single 'ff...' match
1563 1563 return True
1564 1564 if matchednode is None:
1565 1565 raise error.LookupError(node, self.display_id, _(b'no node'))
1566 1566 return True
1567 1567
1568 1568 def maybewdir(prefix):
1569 1569 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1570 1570
1571 1571 hexnode = hex(node)
1572 1572
1573 1573 def disambiguate(hexnode, minlength):
1574 1574 """Disambiguate against wdirid."""
1575 1575 for length in range(minlength, len(hexnode) + 1):
1576 1576 prefix = hexnode[:length]
1577 1577 if not maybewdir(prefix):
1578 1578 return prefix
1579 1579
1580 1580 if not getattr(self, 'filteredrevs', None):
1581 1581 try:
1582 1582 length = max(self.index.shortest(node), minlength)
1583 1583 return disambiguate(hexnode, length)
1584 1584 except error.RevlogError:
1585 1585 if node != self.nodeconstants.wdirid:
1586 1586 raise error.LookupError(
1587 1587 node, self.display_id, _(b'no node')
1588 1588 )
1589 1589 except AttributeError:
1590 1590 # Fall through to pure code
1591 1591 pass
1592 1592
1593 1593 if node == self.nodeconstants.wdirid:
1594 1594 for length in range(minlength, len(hexnode) + 1):
1595 1595 prefix = hexnode[:length]
1596 1596 if isvalid(prefix):
1597 1597 return prefix
1598 1598
1599 1599 for length in range(minlength, len(hexnode) + 1):
1600 1600 prefix = hexnode[:length]
1601 1601 if isvalid(prefix):
1602 1602 return disambiguate(hexnode, length)
1603 1603
1604 1604 def cmp(self, node, text):
1605 1605 """compare text with a given file revision
1606 1606
1607 1607 returns True if text is different than what is stored.
1608 1608 """
1609 1609 p1, p2 = self.parents(node)
1610 1610 return storageutil.hashrevisionsha1(text, p1, p2) != node
1611 1611
1612 1612 def _getsegmentforrevs(self, startrev, endrev, df=None):
1613 1613 """Obtain a segment of raw data corresponding to a range of revisions.
1614 1614
1615 1615 Accepts the start and end revisions and an optional already-open
1616 1616 file handle to be used for reading. If the file handle is read, its
1617 1617 seek position will not be preserved.
1618 1618
1619 1619 Requests for data may be satisfied by a cache.
1620 1620
1621 1621 Returns a 2-tuple of (offset, data) for the requested range of
1622 1622 revisions. Offset is the integer offset from the beginning of the
1623 1623 revlog and data is a str or buffer of the raw byte data.
1624 1624
1625 1625 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1626 1626 to determine where each revision's data begins and ends.
1627 1627 """
1628 1628 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1629 1629 # (functions are expensive).
1630 1630 index = self.index
1631 1631 istart = index[startrev]
1632 1632 start = int(istart[0] >> 16)
1633 1633 if startrev == endrev:
1634 1634 end = start + istart[1]
1635 1635 else:
1636 1636 iend = index[endrev]
1637 1637 end = int(iend[0] >> 16) + iend[1]
1638 1638
1639 1639 if self._inline:
1640 1640 start += (startrev + 1) * self.index.entry_size
1641 1641 end += (endrev + 1) * self.index.entry_size
1642 1642 length = end - start
1643 1643
1644 1644 return start, self._segmentfile.read_chunk(start, length, df)
1645 1645
1646 1646 def _chunk(self, rev, df=None):
1647 1647 """Obtain a single decompressed chunk for a revision.
1648 1648
1649 1649 Accepts an integer revision and an optional already-open file handle
1650 1650 to be used for reading. If used, the seek position of the file will not
1651 1651 be preserved.
1652 1652
1653 1653 Returns a str holding uncompressed data for the requested revision.
1654 1654 """
1655 1655 compression_mode = self.index[rev][10]
1656 1656 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1657 1657 if compression_mode == COMP_MODE_PLAIN:
1658 1658 return data
1659 1659 elif compression_mode == COMP_MODE_DEFAULT:
1660 1660 return self._decompressor(data)
1661 1661 elif compression_mode == COMP_MODE_INLINE:
1662 1662 return self.decompress(data)
1663 1663 else:
1664 1664 msg = b'unknown compression mode %d'
1665 1665 msg %= compression_mode
1666 1666 raise error.RevlogError(msg)
1667 1667
1668 1668 def _chunks(self, revs, df=None, targetsize=None):
1669 1669 """Obtain decompressed chunks for the specified revisions.
1670 1670
1671 1671 Accepts an iterable of numeric revisions that are assumed to be in
1672 1672 ascending order. Also accepts an optional already-open file handle
1673 1673 to be used for reading. If used, the seek position of the file will
1674 1674 not be preserved.
1675 1675
1676 1676 This function is similar to calling ``self._chunk()`` multiple times,
1677 1677 but is faster.
1678 1678
1679 1679 Returns a list with decompressed data for each requested revision.
1680 1680 """
1681 1681 if not revs:
1682 1682 return []
1683 1683 start = self.start
1684 1684 length = self.length
1685 1685 inline = self._inline
1686 1686 iosize = self.index.entry_size
1687 1687 buffer = util.buffer
1688 1688
1689 1689 l = []
1690 1690 ladd = l.append
1691 1691
1692 1692 if not self._withsparseread:
1693 1693 slicedchunks = (revs,)
1694 1694 else:
1695 1695 slicedchunks = deltautil.slicechunk(
1696 1696 self, revs, targetsize=targetsize
1697 1697 )
1698 1698
1699 1699 for revschunk in slicedchunks:
1700 1700 firstrev = revschunk[0]
1701 1701 # Skip trailing revisions with empty diff
1702 1702 for lastrev in revschunk[::-1]:
1703 1703 if length(lastrev) != 0:
1704 1704 break
1705 1705
1706 1706 try:
1707 1707 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1708 1708 except OverflowError:
1709 1709 # issue4215 - we can't cache a run of chunks greater than
1710 1710 # 2G on Windows
1711 1711 return [self._chunk(rev, df=df) for rev in revschunk]
1712 1712
1713 1713 decomp = self.decompress
1714 1714 # self._decompressor might be None, but will not be used in that case
1715 1715 def_decomp = self._decompressor
1716 1716 for rev in revschunk:
1717 1717 chunkstart = start(rev)
1718 1718 if inline:
1719 1719 chunkstart += (rev + 1) * iosize
1720 1720 chunklength = length(rev)
1721 1721 comp_mode = self.index[rev][10]
1722 1722 c = buffer(data, chunkstart - offset, chunklength)
1723 1723 if comp_mode == COMP_MODE_PLAIN:
1724 1724 ladd(c)
1725 1725 elif comp_mode == COMP_MODE_INLINE:
1726 1726 ladd(decomp(c))
1727 1727 elif comp_mode == COMP_MODE_DEFAULT:
1728 1728 ladd(def_decomp(c))
1729 1729 else:
1730 1730 msg = b'unknown compression mode %d'
1731 1731 msg %= comp_mode
1732 1732 raise error.RevlogError(msg)
1733 1733
1734 1734 return l
1735 1735
1736 1736 def deltaparent(self, rev):
1737 1737 """return deltaparent of the given revision"""
1738 1738 base = self.index[rev][3]
1739 1739 if base == rev:
1740 1740 return nullrev
1741 1741 elif self._generaldelta:
1742 1742 return base
1743 1743 else:
1744 1744 return rev - 1
1745 1745
1746 1746 def issnapshot(self, rev):
1747 1747 """tells whether rev is a snapshot"""
1748 1748 if not self._sparserevlog:
1749 1749 return self.deltaparent(rev) == nullrev
1750 1750 elif util.safehasattr(self.index, b'issnapshot'):
1751 1751 # directly assign the method to cache the testing and access
1752 1752 self.issnapshot = self.index.issnapshot
1753 1753 return self.issnapshot(rev)
1754 1754 if rev == nullrev:
1755 1755 return True
1756 1756 entry = self.index[rev]
1757 1757 base = entry[3]
1758 1758 if base == rev:
1759 1759 return True
1760 1760 if base == nullrev:
1761 1761 return True
1762 1762 p1 = entry[5]
1763 1763 p2 = entry[6]
1764 1764 if base == p1 or base == p2:
1765 1765 return False
1766 1766 return self.issnapshot(base)
1767 1767
1768 1768 def snapshotdepth(self, rev):
1769 1769 """number of snapshot in the chain before this one"""
1770 1770 if not self.issnapshot(rev):
1771 1771 raise error.ProgrammingError(b'revision %d not a snapshot')
1772 1772 return len(self._deltachain(rev)[0]) - 1
1773 1773
1774 1774 def revdiff(self, rev1, rev2):
1775 1775 """return or calculate a delta between two revisions
1776 1776
1777 1777 The delta calculated is in binary form and is intended to be written to
1778 1778 revlog data directly. So this function needs raw revision data.
1779 1779 """
1780 1780 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1781 1781 return bytes(self._chunk(rev2))
1782 1782
1783 1783 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1784 1784
1785 1785 def _processflags(self, text, flags, operation, raw=False):
1786 1786 """deprecated entry point to access flag processors"""
1787 1787 msg = b'_processflag(...) use the specialized variant'
1788 1788 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1789 1789 if raw:
1790 1790 return text, flagutil.processflagsraw(self, text, flags)
1791 1791 elif operation == b'read':
1792 1792 return flagutil.processflagsread(self, text, flags)
1793 1793 else: # write operation
1794 1794 return flagutil.processflagswrite(self, text, flags)
1795 1795
1796 1796 def revision(self, nodeorrev, _df=None, raw=False):
1797 1797 """return an uncompressed revision of a given node or revision
1798 1798 number.
1799 1799
1800 1800 _df - an existing file handle to read from. (internal-only)
1801 1801 raw - an optional argument specifying if the revision data is to be
1802 1802 treated as raw data when applying flag transforms. 'raw' should be set
1803 1803 to True when generating changegroups or in debug commands.
1804 1804 """
1805 1805 if raw:
1806 1806 msg = (
1807 1807 b'revlog.revision(..., raw=True) is deprecated, '
1808 1808 b'use revlog.rawdata(...)'
1809 1809 )
1810 1810 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1811 1811 return self._revisiondata(nodeorrev, _df, raw=raw)
1812 1812
1813 1813 def sidedata(self, nodeorrev, _df=None):
1814 1814 """a map of extra data related to the changeset but not part of the hash
1815 1815
1816 1816 This function currently return a dictionary. However, more advanced
1817 1817 mapping object will likely be used in the future for a more
1818 1818 efficient/lazy code.
1819 1819 """
1820 1820 # deal with <nodeorrev> argument type
1821 1821 if isinstance(nodeorrev, int):
1822 1822 rev = nodeorrev
1823 1823 else:
1824 1824 rev = self.rev(nodeorrev)
1825 1825 return self._sidedata(rev)
1826 1826
1827 1827 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1828 1828 # deal with <nodeorrev> argument type
1829 1829 if isinstance(nodeorrev, int):
1830 1830 rev = nodeorrev
1831 1831 node = self.node(rev)
1832 1832 else:
1833 1833 node = nodeorrev
1834 1834 rev = None
1835 1835
1836 1836 # fast path the special `nullid` rev
1837 1837 if node == self.nullid:
1838 1838 return b""
1839 1839
1840 1840 # ``rawtext`` is the text as stored inside the revlog. Might be the
1841 1841 # revision or might need to be processed to retrieve the revision.
1842 1842 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1843 1843
1844 1844 if raw and validated:
1845 1845 # if we don't want to process the raw text and that raw
1846 1846 # text is cached, we can exit early.
1847 1847 return rawtext
1848 1848 if rev is None:
1849 1849 rev = self.rev(node)
1850 1850 # the revlog's flag for this revision
1851 1851 # (usually alter its state or content)
1852 1852 flags = self.flags(rev)
1853 1853
1854 1854 if validated and flags == REVIDX_DEFAULT_FLAGS:
1855 1855 # no extra flags set, no flag processor runs, text = rawtext
1856 1856 return rawtext
1857 1857
1858 1858 if raw:
1859 1859 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1860 1860 text = rawtext
1861 1861 else:
1862 1862 r = flagutil.processflagsread(self, rawtext, flags)
1863 1863 text, validatehash = r
1864 1864 if validatehash:
1865 1865 self.checkhash(text, node, rev=rev)
1866 1866 if not validated:
1867 1867 self._revisioncache = (node, rev, rawtext)
1868 1868
1869 1869 return text
1870 1870
1871 1871 def _rawtext(self, node, rev, _df=None):
1872 1872 """return the possibly unvalidated rawtext for a revision
1873 1873
1874 1874 returns (rev, rawtext, validated)
1875 1875 """
1876 1876
1877 1877 # revision in the cache (could be useful to apply delta)
1878 1878 cachedrev = None
1879 1879 # An intermediate text to apply deltas to
1880 1880 basetext = None
1881 1881
1882 1882 # Check if we have the entry in cache
1883 1883 # The cache entry looks like (node, rev, rawtext)
1884 1884 if self._revisioncache:
1885 1885 if self._revisioncache[0] == node:
1886 1886 return (rev, self._revisioncache[2], True)
1887 1887 cachedrev = self._revisioncache[1]
1888 1888
1889 1889 if rev is None:
1890 1890 rev = self.rev(node)
1891 1891
1892 1892 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1893 1893 if stopped:
1894 1894 basetext = self._revisioncache[2]
1895 1895
1896 1896 # drop cache to save memory, the caller is expected to
1897 1897 # update self._revisioncache after validating the text
1898 1898 self._revisioncache = None
1899 1899
1900 1900 targetsize = None
1901 1901 rawsize = self.index[rev][2]
1902 1902 if 0 <= rawsize:
1903 1903 targetsize = 4 * rawsize
1904 1904
1905 1905 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1906 1906 if basetext is None:
1907 1907 basetext = bytes(bins[0])
1908 1908 bins = bins[1:]
1909 1909
1910 1910 rawtext = mdiff.patches(basetext, bins)
1911 1911 del basetext # let us have a chance to free memory early
1912 1912 return (rev, rawtext, False)
1913 1913
1914 1914 def _sidedata(self, rev):
1915 1915 """Return the sidedata for a given revision number."""
1916 1916 index_entry = self.index[rev]
1917 1917 sidedata_offset = index_entry[8]
1918 1918 sidedata_size = index_entry[9]
1919 1919
1920 1920 if self._inline:
1921 1921 sidedata_offset += self.index.entry_size * (1 + rev)
1922 1922 if sidedata_size == 0:
1923 1923 return {}
1924 1924
1925 1925 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1926 1926 filename = self._sidedatafile
1927 1927 end = self._docket.sidedata_end
1928 1928 offset = sidedata_offset
1929 1929 length = sidedata_size
1930 1930 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1931 1931 raise error.RevlogError(m)
1932 1932
1933 1933 comp_segment = self._segmentfile_sidedata.read_chunk(
1934 1934 sidedata_offset, sidedata_size
1935 1935 )
1936 1936
1937 1937 comp = self.index[rev][11]
1938 1938 if comp == COMP_MODE_PLAIN:
1939 1939 segment = comp_segment
1940 1940 elif comp == COMP_MODE_DEFAULT:
1941 1941 segment = self._decompressor(comp_segment)
1942 1942 elif comp == COMP_MODE_INLINE:
1943 1943 segment = self.decompress(comp_segment)
1944 1944 else:
1945 1945 msg = b'unknown compression mode %d'
1946 1946 msg %= comp
1947 1947 raise error.RevlogError(msg)
1948 1948
1949 1949 sidedata = sidedatautil.deserialize_sidedata(segment)
1950 1950 return sidedata
1951 1951
1952 1952 def rawdata(self, nodeorrev, _df=None):
1953 1953 """return an uncompressed raw data of a given node or revision number.
1954 1954
1955 1955 _df - an existing file handle to read from. (internal-only)
1956 1956 """
1957 1957 return self._revisiondata(nodeorrev, _df, raw=True)
1958 1958
1959 1959 def hash(self, text, p1, p2):
1960 1960 """Compute a node hash.
1961 1961
1962 1962 Available as a function so that subclasses can replace the hash
1963 1963 as needed.
1964 1964 """
1965 1965 return storageutil.hashrevisionsha1(text, p1, p2)
1966 1966
1967 1967 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1968 1968 """Check node hash integrity.
1969 1969
1970 1970 Available as a function so that subclasses can extend hash mismatch
1971 1971 behaviors as needed.
1972 1972 """
1973 1973 try:
1974 1974 if p1 is None and p2 is None:
1975 1975 p1, p2 = self.parents(node)
1976 1976 if node != self.hash(text, p1, p2):
1977 1977 # Clear the revision cache on hash failure. The revision cache
1978 1978 # only stores the raw revision and clearing the cache does have
1979 1979 # the side-effect that we won't have a cache hit when the raw
1980 1980 # revision data is accessed. But this case should be rare and
1981 1981 # it is extra work to teach the cache about the hash
1982 1982 # verification state.
1983 1983 if self._revisioncache and self._revisioncache[0] == node:
1984 1984 self._revisioncache = None
1985 1985
1986 1986 revornode = rev
1987 1987 if revornode is None:
1988 1988 revornode = templatefilters.short(hex(node))
1989 1989 raise error.RevlogError(
1990 1990 _(b"integrity check failed on %s:%s")
1991 1991 % (self.display_id, pycompat.bytestr(revornode))
1992 1992 )
1993 1993 except error.RevlogError:
1994 1994 if self._censorable and storageutil.iscensoredtext(text):
1995 1995 raise error.CensoredNodeError(self.display_id, node, text)
1996 1996 raise
1997 1997
1998 1998 def _enforceinlinesize(self, tr):
1999 1999 """Check if the revlog is too big for inline and convert if so.
2000 2000
2001 2001 This should be called after revisions are added to the revlog. If the
2002 2002 revlog has grown too large to be an inline revlog, it will convert it
2003 2003 to use multiple index and data files.
2004 2004 """
2005 2005 tiprev = len(self) - 1
2006 2006 total_size = self.start(tiprev) + self.length(tiprev)
2007 2007 if not self._inline or total_size < _maxinline:
2008 2008 return
2009 2009
2010 2010 troffset = tr.findoffset(self._indexfile)
2011 2011 if troffset is None:
2012 2012 raise error.RevlogError(
2013 2013 _(b"%s not found in the transaction") % self._indexfile
2014 2014 )
2015 2015 trindex = 0
2016 2016 tr.add(self._datafile, 0)
2017 2017
2018 2018 existing_handles = False
2019 2019 if self._writinghandles is not None:
2020 2020 existing_handles = True
2021 2021 fp = self._writinghandles[0]
2022 2022 fp.flush()
2023 2023 fp.close()
2024 2024 # We can't use the cached file handle after close(). So prevent
2025 2025 # its usage.
2026 2026 self._writinghandles = None
2027 2027 self._segmentfile.writing_handle = None
2028 2028 # No need to deal with sidedata writing handle as it is only
2029 2029 # relevant with revlog-v2 which is never inline, not reaching
2030 2030 # this code
2031 2031
2032 2032 new_dfh = self._datafp(b'w+')
2033 2033 new_dfh.truncate(0) # drop any potentially existing data
2034 2034 try:
2035 2035 with self._indexfp() as read_ifh:
2036 2036 for r in self:
2037 2037 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2038 2038 if troffset <= self.start(r) + r * self.index.entry_size:
2039 2039 trindex = r
2040 2040 new_dfh.flush()
2041 2041
2042 2042 with self.__index_new_fp() as fp:
2043 2043 self._format_flags &= ~FLAG_INLINE_DATA
2044 2044 self._inline = False
2045 2045 for i in self:
2046 2046 e = self.index.entry_binary(i)
2047 2047 if i == 0 and self._docket is None:
2048 2048 header = self._format_flags | self._format_version
2049 2049 header = self.index.pack_header(header)
2050 2050 e = header + e
2051 2051 fp.write(e)
2052 2052 if self._docket is not None:
2053 2053 self._docket.index_end = fp.tell()
2054 2054
2055 2055 # There is a small transactional race here. If the rename of
2056 2056 # the index fails, we should remove the datafile. It is more
2057 2057 # important to ensure that the data file is not truncated
2058 2058 # when the index is replaced as otherwise data is lost.
2059 2059 tr.replace(self._datafile, self.start(trindex))
2060 2060
2061 2061 # the temp file replace the real index when we exit the context
2062 2062 # manager
2063 2063
2064 2064 tr.replace(self._indexfile, trindex * self.index.entry_size)
2065 2065 nodemaputil.setup_persistent_nodemap(tr, self)
2066 2066 self._segmentfile = randomaccessfile.randomaccessfile(
2067 2067 self.opener,
2068 2068 self._datafile,
2069 2069 self._chunkcachesize,
2070 2070 )
2071 2071
2072 2072 if existing_handles:
2073 2073 # switched from inline to conventional reopen the index
2074 2074 ifh = self.__index_write_fp()
2075 2075 self._writinghandles = (ifh, new_dfh, None)
2076 2076 self._segmentfile.writing_handle = new_dfh
2077 2077 new_dfh = None
2078 2078 # No need to deal with sidedata writing handle as it is only
2079 2079 # relevant with revlog-v2 which is never inline, not reaching
2080 2080 # this code
2081 2081 finally:
2082 2082 if new_dfh is not None:
2083 2083 new_dfh.close()
2084 2084
2085 2085 def _nodeduplicatecallback(self, transaction, node):
2086 2086 """called when trying to add a node already stored."""
2087 2087
2088 2088 @contextlib.contextmanager
2089 def reading(self):
2090 """Context manager that keeps data and sidedata files open for reading"""
2091 with self._segmentfile.reading():
2092 with self._segmentfile_sidedata.reading():
2093 yield
2094
2095 @contextlib.contextmanager
2089 2096 def _writing(self, transaction):
2090 2097 if self._trypending:
2091 2098 msg = b'try to write in a `trypending` revlog: %s'
2092 2099 msg %= self.display_id
2093 2100 raise error.ProgrammingError(msg)
2094 2101 if self._writinghandles is not None:
2095 2102 yield
2096 2103 else:
2097 2104 ifh = dfh = sdfh = None
2098 2105 try:
2099 2106 r = len(self)
2100 2107 # opening the data file.
2101 2108 dsize = 0
2102 2109 if r:
2103 2110 dsize = self.end(r - 1)
2104 2111 dfh = None
2105 2112 if not self._inline:
2106 2113 try:
2107 2114 dfh = self._datafp(b"r+")
2108 2115 if self._docket is None:
2109 2116 dfh.seek(0, os.SEEK_END)
2110 2117 else:
2111 2118 dfh.seek(self._docket.data_end, os.SEEK_SET)
2112 2119 except IOError as inst:
2113 2120 if inst.errno != errno.ENOENT:
2114 2121 raise
2115 2122 dfh = self._datafp(b"w+")
2116 2123 transaction.add(self._datafile, dsize)
2117 2124 if self._sidedatafile is not None:
2118 2125 try:
2119 2126 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2120 2127 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2121 2128 except IOError as inst:
2122 2129 if inst.errno != errno.ENOENT:
2123 2130 raise
2124 2131 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2125 2132 transaction.add(
2126 2133 self._sidedatafile, self._docket.sidedata_end
2127 2134 )
2128 2135
2129 2136 # opening the index file.
2130 2137 isize = r * self.index.entry_size
2131 2138 ifh = self.__index_write_fp()
2132 2139 if self._inline:
2133 2140 transaction.add(self._indexfile, dsize + isize)
2134 2141 else:
2135 2142 transaction.add(self._indexfile, isize)
2136 2143 # exposing all file handle for writing.
2137 2144 self._writinghandles = (ifh, dfh, sdfh)
2138 2145 self._segmentfile.writing_handle = ifh if self._inline else dfh
2139 2146 self._segmentfile_sidedata.writing_handle = sdfh
2140 2147 yield
2141 2148 if self._docket is not None:
2142 2149 self._write_docket(transaction)
2143 2150 finally:
2144 2151 self._writinghandles = None
2145 2152 self._segmentfile.writing_handle = None
2146 2153 self._segmentfile_sidedata.writing_handle = None
2147 2154 if dfh is not None:
2148 2155 dfh.close()
2149 2156 if sdfh is not None:
2150 2157 sdfh.close()
2151 2158 # closing the index file last to avoid exposing referent to
2152 2159 # potential unflushed data content.
2153 2160 if ifh is not None:
2154 2161 ifh.close()
2155 2162
2156 2163 def _write_docket(self, transaction):
2157 2164 """write the current docket on disk
2158 2165
2159 2166 Exist as a method to help changelog to implement transaction logic
2160 2167
2161 2168 We could also imagine using the same transaction logic for all revlog
2162 2169 since docket are cheap."""
2163 2170 self._docket.write(transaction)
2164 2171
2165 2172 def addrevision(
2166 2173 self,
2167 2174 text,
2168 2175 transaction,
2169 2176 link,
2170 2177 p1,
2171 2178 p2,
2172 2179 cachedelta=None,
2173 2180 node=None,
2174 2181 flags=REVIDX_DEFAULT_FLAGS,
2175 2182 deltacomputer=None,
2176 2183 sidedata=None,
2177 2184 ):
2178 2185 """add a revision to the log
2179 2186
2180 2187 text - the revision data to add
2181 2188 transaction - the transaction object used for rollback
2182 2189 link - the linkrev data to add
2183 2190 p1, p2 - the parent nodeids of the revision
2184 2191 cachedelta - an optional precomputed delta
2185 2192 node - nodeid of revision; typically node is not specified, and it is
2186 2193 computed by default as hash(text, p1, p2), however subclasses might
2187 2194 use different hashing method (and override checkhash() in such case)
2188 2195 flags - the known flags to set on the revision
2189 2196 deltacomputer - an optional deltacomputer instance shared between
2190 2197 multiple calls
2191 2198 """
2192 2199 if link == nullrev:
2193 2200 raise error.RevlogError(
2194 2201 _(b"attempted to add linkrev -1 to %s") % self.display_id
2195 2202 )
2196 2203
2197 2204 if sidedata is None:
2198 2205 sidedata = {}
2199 2206 elif sidedata and not self.hassidedata:
2200 2207 raise error.ProgrammingError(
2201 2208 _(b"trying to add sidedata to a revlog who don't support them")
2202 2209 )
2203 2210
2204 2211 if flags:
2205 2212 node = node or self.hash(text, p1, p2)
2206 2213
2207 2214 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2208 2215
2209 2216 # If the flag processor modifies the revision data, ignore any provided
2210 2217 # cachedelta.
2211 2218 if rawtext != text:
2212 2219 cachedelta = None
2213 2220
2214 2221 if len(rawtext) > _maxentrysize:
2215 2222 raise error.RevlogError(
2216 2223 _(
2217 2224 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2218 2225 )
2219 2226 % (self.display_id, len(rawtext))
2220 2227 )
2221 2228
2222 2229 node = node or self.hash(rawtext, p1, p2)
2223 2230 rev = self.index.get_rev(node)
2224 2231 if rev is not None:
2225 2232 return rev
2226 2233
2227 2234 if validatehash:
2228 2235 self.checkhash(rawtext, node, p1=p1, p2=p2)
2229 2236
2230 2237 return self.addrawrevision(
2231 2238 rawtext,
2232 2239 transaction,
2233 2240 link,
2234 2241 p1,
2235 2242 p2,
2236 2243 node,
2237 2244 flags,
2238 2245 cachedelta=cachedelta,
2239 2246 deltacomputer=deltacomputer,
2240 2247 sidedata=sidedata,
2241 2248 )
2242 2249
2243 2250 def addrawrevision(
2244 2251 self,
2245 2252 rawtext,
2246 2253 transaction,
2247 2254 link,
2248 2255 p1,
2249 2256 p2,
2250 2257 node,
2251 2258 flags,
2252 2259 cachedelta=None,
2253 2260 deltacomputer=None,
2254 2261 sidedata=None,
2255 2262 ):
2256 2263 """add a raw revision with known flags, node and parents
2257 2264 useful when reusing a revision not stored in this revlog (ex: received
2258 2265 over wire, or read from an external bundle).
2259 2266 """
2260 2267 with self._writing(transaction):
2261 2268 return self._addrevision(
2262 2269 node,
2263 2270 rawtext,
2264 2271 transaction,
2265 2272 link,
2266 2273 p1,
2267 2274 p2,
2268 2275 flags,
2269 2276 cachedelta,
2270 2277 deltacomputer=deltacomputer,
2271 2278 sidedata=sidedata,
2272 2279 )
2273 2280
2274 2281 def compress(self, data):
2275 2282 """Generate a possibly-compressed representation of data."""
2276 2283 if not data:
2277 2284 return b'', data
2278 2285
2279 2286 compressed = self._compressor.compress(data)
2280 2287
2281 2288 if compressed:
2282 2289 # The revlog compressor added the header in the returned data.
2283 2290 return b'', compressed
2284 2291
2285 2292 if data[0:1] == b'\0':
2286 2293 return b'', data
2287 2294 return b'u', data
2288 2295
2289 2296 def decompress(self, data):
2290 2297 """Decompress a revlog chunk.
2291 2298
2292 2299 The chunk is expected to begin with a header identifying the
2293 2300 format type so it can be routed to an appropriate decompressor.
2294 2301 """
2295 2302 if not data:
2296 2303 return data
2297 2304
2298 2305 # Revlogs are read much more frequently than they are written and many
2299 2306 # chunks only take microseconds to decompress, so performance is
2300 2307 # important here.
2301 2308 #
2302 2309 # We can make a few assumptions about revlogs:
2303 2310 #
2304 2311 # 1) the majority of chunks will be compressed (as opposed to inline
2305 2312 # raw data).
2306 2313 # 2) decompressing *any* data will likely by at least 10x slower than
2307 2314 # returning raw inline data.
2308 2315 # 3) we want to prioritize common and officially supported compression
2309 2316 # engines
2310 2317 #
2311 2318 # It follows that we want to optimize for "decompress compressed data
2312 2319 # when encoded with common and officially supported compression engines"
2313 2320 # case over "raw data" and "data encoded by less common or non-official
2314 2321 # compression engines." That is why we have the inline lookup first
2315 2322 # followed by the compengines lookup.
2316 2323 #
2317 2324 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2318 2325 # compressed chunks. And this matters for changelog and manifest reads.
2319 2326 t = data[0:1]
2320 2327
2321 2328 if t == b'x':
2322 2329 try:
2323 2330 return _zlibdecompress(data)
2324 2331 except zlib.error as e:
2325 2332 raise error.RevlogError(
2326 2333 _(b'revlog decompress error: %s')
2327 2334 % stringutil.forcebytestr(e)
2328 2335 )
2329 2336 # '\0' is more common than 'u' so it goes first.
2330 2337 elif t == b'\0':
2331 2338 return data
2332 2339 elif t == b'u':
2333 2340 return util.buffer(data, 1)
2334 2341
2335 2342 compressor = self._get_decompressor(t)
2336 2343
2337 2344 return compressor.decompress(data)
2338 2345
2339 2346 def _addrevision(
2340 2347 self,
2341 2348 node,
2342 2349 rawtext,
2343 2350 transaction,
2344 2351 link,
2345 2352 p1,
2346 2353 p2,
2347 2354 flags,
2348 2355 cachedelta,
2349 2356 alwayscache=False,
2350 2357 deltacomputer=None,
2351 2358 sidedata=None,
2352 2359 ):
2353 2360 """internal function to add revisions to the log
2354 2361
2355 2362 see addrevision for argument descriptions.
2356 2363
2357 2364 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2358 2365
2359 2366 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2360 2367 be used.
2361 2368
2362 2369 invariants:
2363 2370 - rawtext is optional (can be None); if not set, cachedelta must be set.
2364 2371 if both are set, they must correspond to each other.
2365 2372 """
2366 2373 if node == self.nullid:
2367 2374 raise error.RevlogError(
2368 2375 _(b"%s: attempt to add null revision") % self.display_id
2369 2376 )
2370 2377 if (
2371 2378 node == self.nodeconstants.wdirid
2372 2379 or node in self.nodeconstants.wdirfilenodeids
2373 2380 ):
2374 2381 raise error.RevlogError(
2375 2382 _(b"%s: attempt to add wdir revision") % self.display_id
2376 2383 )
2377 2384 if self._writinghandles is None:
2378 2385 msg = b'adding revision outside `revlog._writing` context'
2379 2386 raise error.ProgrammingError(msg)
2380 2387
2381 2388 if self._inline:
2382 2389 fh = self._writinghandles[0]
2383 2390 else:
2384 2391 fh = self._writinghandles[1]
2385 2392
2386 2393 btext = [rawtext]
2387 2394
2388 2395 curr = len(self)
2389 2396 prev = curr - 1
2390 2397
2391 2398 offset = self._get_data_offset(prev)
2392 2399
2393 2400 if self._concurrencychecker:
2394 2401 ifh, dfh, sdfh = self._writinghandles
2395 2402 # XXX no checking for the sidedata file
2396 2403 if self._inline:
2397 2404 # offset is "as if" it were in the .d file, so we need to add on
2398 2405 # the size of the entry metadata.
2399 2406 self._concurrencychecker(
2400 2407 ifh, self._indexfile, offset + curr * self.index.entry_size
2401 2408 )
2402 2409 else:
2403 2410 # Entries in the .i are a consistent size.
2404 2411 self._concurrencychecker(
2405 2412 ifh, self._indexfile, curr * self.index.entry_size
2406 2413 )
2407 2414 self._concurrencychecker(dfh, self._datafile, offset)
2408 2415
2409 2416 p1r, p2r = self.rev(p1), self.rev(p2)
2410 2417
2411 2418 # full versions are inserted when the needed deltas
2412 2419 # become comparable to the uncompressed text
2413 2420 if rawtext is None:
2414 2421 # need rawtext size, before changed by flag processors, which is
2415 2422 # the non-raw size. use revlog explicitly to avoid filelog's extra
2416 2423 # logic that might remove metadata size.
2417 2424 textlen = mdiff.patchedsize(
2418 2425 revlog.size(self, cachedelta[0]), cachedelta[1]
2419 2426 )
2420 2427 else:
2421 2428 textlen = len(rawtext)
2422 2429
2423 2430 if deltacomputer is None:
2424 2431 deltacomputer = deltautil.deltacomputer(self)
2425 2432
2426 2433 revinfo = revlogutils.revisioninfo(
2427 2434 node,
2428 2435 p1,
2429 2436 p2,
2430 2437 btext,
2431 2438 textlen,
2432 2439 cachedelta,
2433 2440 flags,
2434 2441 )
2435 2442
2436 2443 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2437 2444
2438 2445 compression_mode = COMP_MODE_INLINE
2439 2446 if self._docket is not None:
2440 2447 default_comp = self._docket.default_compression_header
2441 2448 r = deltautil.delta_compression(default_comp, deltainfo)
2442 2449 compression_mode, deltainfo = r
2443 2450
2444 2451 sidedata_compression_mode = COMP_MODE_INLINE
2445 2452 if sidedata and self.hassidedata:
2446 2453 sidedata_compression_mode = COMP_MODE_PLAIN
2447 2454 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2448 2455 sidedata_offset = self._docket.sidedata_end
2449 2456 h, comp_sidedata = self.compress(serialized_sidedata)
2450 2457 if (
2451 2458 h != b'u'
2452 2459 and comp_sidedata[0:1] != b'\0'
2453 2460 and len(comp_sidedata) < len(serialized_sidedata)
2454 2461 ):
2455 2462 assert not h
2456 2463 if (
2457 2464 comp_sidedata[0:1]
2458 2465 == self._docket.default_compression_header
2459 2466 ):
2460 2467 sidedata_compression_mode = COMP_MODE_DEFAULT
2461 2468 serialized_sidedata = comp_sidedata
2462 2469 else:
2463 2470 sidedata_compression_mode = COMP_MODE_INLINE
2464 2471 serialized_sidedata = comp_sidedata
2465 2472 else:
2466 2473 serialized_sidedata = b""
2467 2474 # Don't store the offset if the sidedata is empty, that way
2468 2475 # we can easily detect empty sidedata and they will be no different
2469 2476 # than ones we manually add.
2470 2477 sidedata_offset = 0
2471 2478
2472 2479 e = revlogutils.entry(
2473 2480 flags=flags,
2474 2481 data_offset=offset,
2475 2482 data_compressed_length=deltainfo.deltalen,
2476 2483 data_uncompressed_length=textlen,
2477 2484 data_compression_mode=compression_mode,
2478 2485 data_delta_base=deltainfo.base,
2479 2486 link_rev=link,
2480 2487 parent_rev_1=p1r,
2481 2488 parent_rev_2=p2r,
2482 2489 node_id=node,
2483 2490 sidedata_offset=sidedata_offset,
2484 2491 sidedata_compressed_length=len(serialized_sidedata),
2485 2492 sidedata_compression_mode=sidedata_compression_mode,
2486 2493 )
2487 2494
2488 2495 self.index.append(e)
2489 2496 entry = self.index.entry_binary(curr)
2490 2497 if curr == 0 and self._docket is None:
2491 2498 header = self._format_flags | self._format_version
2492 2499 header = self.index.pack_header(header)
2493 2500 entry = header + entry
2494 2501 self._writeentry(
2495 2502 transaction,
2496 2503 entry,
2497 2504 deltainfo.data,
2498 2505 link,
2499 2506 offset,
2500 2507 serialized_sidedata,
2501 2508 sidedata_offset,
2502 2509 )
2503 2510
2504 2511 rawtext = btext[0]
2505 2512
2506 2513 if alwayscache and rawtext is None:
2507 2514 rawtext = deltacomputer.buildtext(revinfo, fh)
2508 2515
2509 2516 if type(rawtext) == bytes: # only accept immutable objects
2510 2517 self._revisioncache = (node, curr, rawtext)
2511 2518 self._chainbasecache[curr] = deltainfo.chainbase
2512 2519 return curr
2513 2520
2514 2521 def _get_data_offset(self, prev):
2515 2522 """Returns the current offset in the (in-transaction) data file.
2516 2523 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2517 2524 file to store that information: since sidedata can be rewritten to the
2518 2525 end of the data file within a transaction, you can have cases where, for
2519 2526 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2520 2527 to `n - 1`'s sidedata being written after `n`'s data.
2521 2528
2522 2529 TODO cache this in a docket file before getting out of experimental."""
2523 2530 if self._docket is None:
2524 2531 return self.end(prev)
2525 2532 else:
2526 2533 return self._docket.data_end
2527 2534
2528 2535 def _writeentry(
2529 2536 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2530 2537 ):
2531 2538 # Files opened in a+ mode have inconsistent behavior on various
2532 2539 # platforms. Windows requires that a file positioning call be made
2533 2540 # when the file handle transitions between reads and writes. See
2534 2541 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2535 2542 # platforms, Python or the platform itself can be buggy. Some versions
2536 2543 # of Solaris have been observed to not append at the end of the file
2537 2544 # if the file was seeked to before the end. See issue4943 for more.
2538 2545 #
2539 2546 # We work around this issue by inserting a seek() before writing.
2540 2547 # Note: This is likely not necessary on Python 3. However, because
2541 2548 # the file handle is reused for reads and may be seeked there, we need
2542 2549 # to be careful before changing this.
2543 2550 if self._writinghandles is None:
2544 2551 msg = b'adding revision outside `revlog._writing` context'
2545 2552 raise error.ProgrammingError(msg)
2546 2553 ifh, dfh, sdfh = self._writinghandles
2547 2554 if self._docket is None:
2548 2555 ifh.seek(0, os.SEEK_END)
2549 2556 else:
2550 2557 ifh.seek(self._docket.index_end, os.SEEK_SET)
2551 2558 if dfh:
2552 2559 if self._docket is None:
2553 2560 dfh.seek(0, os.SEEK_END)
2554 2561 else:
2555 2562 dfh.seek(self._docket.data_end, os.SEEK_SET)
2556 2563 if sdfh:
2557 2564 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2558 2565
2559 2566 curr = len(self) - 1
2560 2567 if not self._inline:
2561 2568 transaction.add(self._datafile, offset)
2562 2569 if self._sidedatafile:
2563 2570 transaction.add(self._sidedatafile, sidedata_offset)
2564 2571 transaction.add(self._indexfile, curr * len(entry))
2565 2572 if data[0]:
2566 2573 dfh.write(data[0])
2567 2574 dfh.write(data[1])
2568 2575 if sidedata:
2569 2576 sdfh.write(sidedata)
2570 2577 ifh.write(entry)
2571 2578 else:
2572 2579 offset += curr * self.index.entry_size
2573 2580 transaction.add(self._indexfile, offset)
2574 2581 ifh.write(entry)
2575 2582 ifh.write(data[0])
2576 2583 ifh.write(data[1])
2577 2584 assert not sidedata
2578 2585 self._enforceinlinesize(transaction)
2579 2586 if self._docket is not None:
2580 2587 self._docket.index_end = self._writinghandles[0].tell()
2581 2588 self._docket.data_end = self._writinghandles[1].tell()
2582 2589 self._docket.sidedata_end = self._writinghandles[2].tell()
2583 2590
2584 2591 nodemaputil.setup_persistent_nodemap(transaction, self)
2585 2592
2586 2593 def addgroup(
2587 2594 self,
2588 2595 deltas,
2589 2596 linkmapper,
2590 2597 transaction,
2591 2598 alwayscache=False,
2592 2599 addrevisioncb=None,
2593 2600 duplicaterevisioncb=None,
2594 2601 ):
2595 2602 """
2596 2603 add a delta group
2597 2604
2598 2605 given a set of deltas, add them to the revision log. the
2599 2606 first delta is against its parent, which should be in our
2600 2607 log, the rest are against the previous delta.
2601 2608
2602 2609 If ``addrevisioncb`` is defined, it will be called with arguments of
2603 2610 this revlog and the node that was added.
2604 2611 """
2605 2612
2606 2613 if self._adding_group:
2607 2614 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2608 2615
2609 2616 self._adding_group = True
2610 2617 empty = True
2611 2618 try:
2612 2619 with self._writing(transaction):
2613 2620 deltacomputer = deltautil.deltacomputer(self)
2614 2621 # loop through our set of deltas
2615 2622 for data in deltas:
2616 2623 (
2617 2624 node,
2618 2625 p1,
2619 2626 p2,
2620 2627 linknode,
2621 2628 deltabase,
2622 2629 delta,
2623 2630 flags,
2624 2631 sidedata,
2625 2632 ) = data
2626 2633 link = linkmapper(linknode)
2627 2634 flags = flags or REVIDX_DEFAULT_FLAGS
2628 2635
2629 2636 rev = self.index.get_rev(node)
2630 2637 if rev is not None:
2631 2638 # this can happen if two branches make the same change
2632 2639 self._nodeduplicatecallback(transaction, rev)
2633 2640 if duplicaterevisioncb:
2634 2641 duplicaterevisioncb(self, rev)
2635 2642 empty = False
2636 2643 continue
2637 2644
2638 2645 for p in (p1, p2):
2639 2646 if not self.index.has_node(p):
2640 2647 raise error.LookupError(
2641 2648 p, self.radix, _(b'unknown parent')
2642 2649 )
2643 2650
2644 2651 if not self.index.has_node(deltabase):
2645 2652 raise error.LookupError(
2646 2653 deltabase, self.display_id, _(b'unknown delta base')
2647 2654 )
2648 2655
2649 2656 baserev = self.rev(deltabase)
2650 2657
2651 2658 if baserev != nullrev and self.iscensored(baserev):
2652 2659 # if base is censored, delta must be full replacement in a
2653 2660 # single patch operation
2654 2661 hlen = struct.calcsize(b">lll")
2655 2662 oldlen = self.rawsize(baserev)
2656 2663 newlen = len(delta) - hlen
2657 2664 if delta[:hlen] != mdiff.replacediffheader(
2658 2665 oldlen, newlen
2659 2666 ):
2660 2667 raise error.CensoredBaseError(
2661 2668 self.display_id, self.node(baserev)
2662 2669 )
2663 2670
2664 2671 if not flags and self._peek_iscensored(baserev, delta):
2665 2672 flags |= REVIDX_ISCENSORED
2666 2673
2667 2674 # We assume consumers of addrevisioncb will want to retrieve
2668 2675 # the added revision, which will require a call to
2669 2676 # revision(). revision() will fast path if there is a cache
2670 2677 # hit. So, we tell _addrevision() to always cache in this case.
2671 2678 # We're only using addgroup() in the context of changegroup
2672 2679 # generation so the revision data can always be handled as raw
2673 2680 # by the flagprocessor.
2674 2681 rev = self._addrevision(
2675 2682 node,
2676 2683 None,
2677 2684 transaction,
2678 2685 link,
2679 2686 p1,
2680 2687 p2,
2681 2688 flags,
2682 2689 (baserev, delta),
2683 2690 alwayscache=alwayscache,
2684 2691 deltacomputer=deltacomputer,
2685 2692 sidedata=sidedata,
2686 2693 )
2687 2694
2688 2695 if addrevisioncb:
2689 2696 addrevisioncb(self, rev)
2690 2697 empty = False
2691 2698 finally:
2692 2699 self._adding_group = False
2693 2700 return not empty
2694 2701
2695 2702 def iscensored(self, rev):
2696 2703 """Check if a file revision is censored."""
2697 2704 if not self._censorable:
2698 2705 return False
2699 2706
2700 2707 return self.flags(rev) & REVIDX_ISCENSORED
2701 2708
2702 2709 def _peek_iscensored(self, baserev, delta):
2703 2710 """Quickly check if a delta produces a censored revision."""
2704 2711 if not self._censorable:
2705 2712 return False
2706 2713
2707 2714 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2708 2715
2709 2716 def getstrippoint(self, minlink):
2710 2717 """find the minimum rev that must be stripped to strip the linkrev
2711 2718
2712 2719 Returns a tuple containing the minimum rev and a set of all revs that
2713 2720 have linkrevs that will be broken by this strip.
2714 2721 """
2715 2722 return storageutil.resolvestripinfo(
2716 2723 minlink,
2717 2724 len(self) - 1,
2718 2725 self.headrevs(),
2719 2726 self.linkrev,
2720 2727 self.parentrevs,
2721 2728 )
2722 2729
2723 2730 def strip(self, minlink, transaction):
2724 2731 """truncate the revlog on the first revision with a linkrev >= minlink
2725 2732
2726 2733 This function is called when we're stripping revision minlink and
2727 2734 its descendants from the repository.
2728 2735
2729 2736 We have to remove all revisions with linkrev >= minlink, because
2730 2737 the equivalent changelog revisions will be renumbered after the
2731 2738 strip.
2732 2739
2733 2740 So we truncate the revlog on the first of these revisions, and
2734 2741 trust that the caller has saved the revisions that shouldn't be
2735 2742 removed and that it'll re-add them after this truncation.
2736 2743 """
2737 2744 if len(self) == 0:
2738 2745 return
2739 2746
2740 2747 rev, _ = self.getstrippoint(minlink)
2741 2748 if rev == len(self):
2742 2749 return
2743 2750
2744 2751 # first truncate the files on disk
2745 2752 data_end = self.start(rev)
2746 2753 if not self._inline:
2747 2754 transaction.add(self._datafile, data_end)
2748 2755 end = rev * self.index.entry_size
2749 2756 else:
2750 2757 end = data_end + (rev * self.index.entry_size)
2751 2758
2752 2759 if self._sidedatafile:
2753 2760 sidedata_end = self.sidedata_cut_off(rev)
2754 2761 transaction.add(self._sidedatafile, sidedata_end)
2755 2762
2756 2763 transaction.add(self._indexfile, end)
2757 2764 if self._docket is not None:
2758 2765 # XXX we could, leverage the docket while stripping. However it is
2759 2766 # not powerfull enough at the time of this comment
2760 2767 self._docket.index_end = end
2761 2768 self._docket.data_end = data_end
2762 2769 self._docket.sidedata_end = sidedata_end
2763 2770 self._docket.write(transaction, stripping=True)
2764 2771
2765 2772 # then reset internal state in memory to forget those revisions
2766 2773 self._revisioncache = None
2767 2774 self._chaininfocache = util.lrucachedict(500)
2768 2775 self._segmentfile.clear_cache()
2769 2776 self._segmentfile_sidedata.clear_cache()
2770 2777
2771 2778 del self.index[rev:-1]
2772 2779
2773 2780 def checksize(self):
2774 2781 """Check size of index and data files
2775 2782
2776 2783 return a (dd, di) tuple.
2777 2784 - dd: extra bytes for the "data" file
2778 2785 - di: extra bytes for the "index" file
2779 2786
2780 2787 A healthy revlog will return (0, 0).
2781 2788 """
2782 2789 expected = 0
2783 2790 if len(self):
2784 2791 expected = max(0, self.end(len(self) - 1))
2785 2792
2786 2793 try:
2787 2794 with self._datafp() as f:
2788 2795 f.seek(0, io.SEEK_END)
2789 2796 actual = f.tell()
2790 2797 dd = actual - expected
2791 2798 except IOError as inst:
2792 2799 if inst.errno != errno.ENOENT:
2793 2800 raise
2794 2801 dd = 0
2795 2802
2796 2803 try:
2797 2804 f = self.opener(self._indexfile)
2798 2805 f.seek(0, io.SEEK_END)
2799 2806 actual = f.tell()
2800 2807 f.close()
2801 2808 s = self.index.entry_size
2802 2809 i = max(0, actual // s)
2803 2810 di = actual - (i * s)
2804 2811 if self._inline:
2805 2812 databytes = 0
2806 2813 for r in self:
2807 2814 databytes += max(0, self.length(r))
2808 2815 dd = 0
2809 2816 di = actual - len(self) * s - databytes
2810 2817 except IOError as inst:
2811 2818 if inst.errno != errno.ENOENT:
2812 2819 raise
2813 2820 di = 0
2814 2821
2815 2822 return (dd, di)
2816 2823
2817 2824 def files(self):
2818 2825 res = [self._indexfile]
2819 2826 if self._docket_file is None:
2820 2827 if not self._inline:
2821 2828 res.append(self._datafile)
2822 2829 else:
2823 2830 res.append(self._docket_file)
2824 2831 res.extend(self._docket.old_index_filepaths(include_empty=False))
2825 2832 if self._docket.data_end:
2826 2833 res.append(self._datafile)
2827 2834 res.extend(self._docket.old_data_filepaths(include_empty=False))
2828 2835 if self._docket.sidedata_end:
2829 2836 res.append(self._sidedatafile)
2830 2837 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2831 2838 return res
2832 2839
2833 2840 def emitrevisions(
2834 2841 self,
2835 2842 nodes,
2836 2843 nodesorder=None,
2837 2844 revisiondata=False,
2838 2845 assumehaveparentrevisions=False,
2839 2846 deltamode=repository.CG_DELTAMODE_STD,
2840 2847 sidedata_helpers=None,
2841 2848 ):
2842 2849 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2843 2850 raise error.ProgrammingError(
2844 2851 b'unhandled value for nodesorder: %s' % nodesorder
2845 2852 )
2846 2853
2847 2854 if nodesorder is None and not self._generaldelta:
2848 2855 nodesorder = b'storage'
2849 2856
2850 2857 if (
2851 2858 not self._storedeltachains
2852 2859 and deltamode != repository.CG_DELTAMODE_PREV
2853 2860 ):
2854 2861 deltamode = repository.CG_DELTAMODE_FULL
2855 2862
2856 2863 return storageutil.emitrevisions(
2857 2864 self,
2858 2865 nodes,
2859 2866 nodesorder,
2860 2867 revlogrevisiondelta,
2861 2868 deltaparentfn=self.deltaparent,
2862 2869 candeltafn=self.candelta,
2863 2870 rawsizefn=self.rawsize,
2864 2871 revdifffn=self.revdiff,
2865 2872 flagsfn=self.flags,
2866 2873 deltamode=deltamode,
2867 2874 revisiondata=revisiondata,
2868 2875 assumehaveparentrevisions=assumehaveparentrevisions,
2869 2876 sidedata_helpers=sidedata_helpers,
2870 2877 )
2871 2878
2872 2879 DELTAREUSEALWAYS = b'always'
2873 2880 DELTAREUSESAMEREVS = b'samerevs'
2874 2881 DELTAREUSENEVER = b'never'
2875 2882
2876 2883 DELTAREUSEFULLADD = b'fulladd'
2877 2884
2878 2885 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2879 2886
2880 2887 def clone(
2881 2888 self,
2882 2889 tr,
2883 2890 destrevlog,
2884 2891 addrevisioncb=None,
2885 2892 deltareuse=DELTAREUSESAMEREVS,
2886 2893 forcedeltabothparents=None,
2887 2894 sidedata_helpers=None,
2888 2895 ):
2889 2896 """Copy this revlog to another, possibly with format changes.
2890 2897
2891 2898 The destination revlog will contain the same revisions and nodes.
2892 2899 However, it may not be bit-for-bit identical due to e.g. delta encoding
2893 2900 differences.
2894 2901
2895 2902 The ``deltareuse`` argument control how deltas from the existing revlog
2896 2903 are preserved in the destination revlog. The argument can have the
2897 2904 following values:
2898 2905
2899 2906 DELTAREUSEALWAYS
2900 2907 Deltas will always be reused (if possible), even if the destination
2901 2908 revlog would not select the same revisions for the delta. This is the
2902 2909 fastest mode of operation.
2903 2910 DELTAREUSESAMEREVS
2904 2911 Deltas will be reused if the destination revlog would pick the same
2905 2912 revisions for the delta. This mode strikes a balance between speed
2906 2913 and optimization.
2907 2914 DELTAREUSENEVER
2908 2915 Deltas will never be reused. This is the slowest mode of execution.
2909 2916 This mode can be used to recompute deltas (e.g. if the diff/delta
2910 2917 algorithm changes).
2911 2918 DELTAREUSEFULLADD
2912 2919 Revision will be re-added as if their were new content. This is
2913 2920 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2914 2921 eg: large file detection and handling.
2915 2922
2916 2923 Delta computation can be slow, so the choice of delta reuse policy can
2917 2924 significantly affect run time.
2918 2925
2919 2926 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2920 2927 two extremes. Deltas will be reused if they are appropriate. But if the
2921 2928 delta could choose a better revision, it will do so. This means if you
2922 2929 are converting a non-generaldelta revlog to a generaldelta revlog,
2923 2930 deltas will be recomputed if the delta's parent isn't a parent of the
2924 2931 revision.
2925 2932
2926 2933 In addition to the delta policy, the ``forcedeltabothparents``
2927 2934 argument controls whether to force compute deltas against both parents
2928 2935 for merges. By default, the current default is used.
2929 2936
2930 2937 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2931 2938 `sidedata_helpers`.
2932 2939 """
2933 2940 if deltareuse not in self.DELTAREUSEALL:
2934 2941 raise ValueError(
2935 2942 _(b'value for deltareuse invalid: %s') % deltareuse
2936 2943 )
2937 2944
2938 2945 if len(destrevlog):
2939 2946 raise ValueError(_(b'destination revlog is not empty'))
2940 2947
2941 2948 if getattr(self, 'filteredrevs', None):
2942 2949 raise ValueError(_(b'source revlog has filtered revisions'))
2943 2950 if getattr(destrevlog, 'filteredrevs', None):
2944 2951 raise ValueError(_(b'destination revlog has filtered revisions'))
2945 2952
2946 2953 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2947 2954 # if possible.
2948 2955 oldlazydelta = destrevlog._lazydelta
2949 2956 oldlazydeltabase = destrevlog._lazydeltabase
2950 2957 oldamd = destrevlog._deltabothparents
2951 2958
2952 2959 try:
2953 2960 if deltareuse == self.DELTAREUSEALWAYS:
2954 2961 destrevlog._lazydeltabase = True
2955 2962 destrevlog._lazydelta = True
2956 2963 elif deltareuse == self.DELTAREUSESAMEREVS:
2957 2964 destrevlog._lazydeltabase = False
2958 2965 destrevlog._lazydelta = True
2959 2966 elif deltareuse == self.DELTAREUSENEVER:
2960 2967 destrevlog._lazydeltabase = False
2961 2968 destrevlog._lazydelta = False
2962 2969
2963 2970 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2964 2971
2965 2972 self._clone(
2966 2973 tr,
2967 2974 destrevlog,
2968 2975 addrevisioncb,
2969 2976 deltareuse,
2970 2977 forcedeltabothparents,
2971 2978 sidedata_helpers,
2972 2979 )
2973 2980
2974 2981 finally:
2975 2982 destrevlog._lazydelta = oldlazydelta
2976 2983 destrevlog._lazydeltabase = oldlazydeltabase
2977 2984 destrevlog._deltabothparents = oldamd
2978 2985
2979 2986 def _clone(
2980 2987 self,
2981 2988 tr,
2982 2989 destrevlog,
2983 2990 addrevisioncb,
2984 2991 deltareuse,
2985 2992 forcedeltabothparents,
2986 2993 sidedata_helpers,
2987 2994 ):
2988 2995 """perform the core duty of `revlog.clone` after parameter processing"""
2989 2996 deltacomputer = deltautil.deltacomputer(destrevlog)
2990 2997 index = self.index
2991 2998 for rev in self:
2992 2999 entry = index[rev]
2993 3000
2994 3001 # Some classes override linkrev to take filtered revs into
2995 3002 # account. Use raw entry from index.
2996 3003 flags = entry[0] & 0xFFFF
2997 3004 linkrev = entry[4]
2998 3005 p1 = index[entry[5]][7]
2999 3006 p2 = index[entry[6]][7]
3000 3007 node = entry[7]
3001 3008
3002 3009 # (Possibly) reuse the delta from the revlog if allowed and
3003 3010 # the revlog chunk is a delta.
3004 3011 cachedelta = None
3005 3012 rawtext = None
3006 3013 if deltareuse == self.DELTAREUSEFULLADD:
3007 3014 text = self._revisiondata(rev)
3008 3015 sidedata = self.sidedata(rev)
3009 3016
3010 3017 if sidedata_helpers is not None:
3011 3018 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3012 3019 self, sidedata_helpers, sidedata, rev
3013 3020 )
3014 3021 flags = flags | new_flags[0] & ~new_flags[1]
3015 3022
3016 3023 destrevlog.addrevision(
3017 3024 text,
3018 3025 tr,
3019 3026 linkrev,
3020 3027 p1,
3021 3028 p2,
3022 3029 cachedelta=cachedelta,
3023 3030 node=node,
3024 3031 flags=flags,
3025 3032 deltacomputer=deltacomputer,
3026 3033 sidedata=sidedata,
3027 3034 )
3028 3035 else:
3029 3036 if destrevlog._lazydelta:
3030 3037 dp = self.deltaparent(rev)
3031 3038 if dp != nullrev:
3032 3039 cachedelta = (dp, bytes(self._chunk(rev)))
3033 3040
3034 3041 sidedata = None
3035 3042 if not cachedelta:
3036 3043 rawtext = self._revisiondata(rev)
3037 3044 sidedata = self.sidedata(rev)
3038 3045 if sidedata is None:
3039 3046 sidedata = self.sidedata(rev)
3040 3047
3041 3048 if sidedata_helpers is not None:
3042 3049 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3043 3050 self, sidedata_helpers, sidedata, rev
3044 3051 )
3045 3052 flags = flags | new_flags[0] & ~new_flags[1]
3046 3053
3047 3054 with destrevlog._writing(tr):
3048 3055 destrevlog._addrevision(
3049 3056 node,
3050 3057 rawtext,
3051 3058 tr,
3052 3059 linkrev,
3053 3060 p1,
3054 3061 p2,
3055 3062 flags,
3056 3063 cachedelta,
3057 3064 deltacomputer=deltacomputer,
3058 3065 sidedata=sidedata,
3059 3066 )
3060 3067
3061 3068 if addrevisioncb:
3062 3069 addrevisioncb(self, rev, node)
3063 3070
3064 3071 def censorrevision(self, tr, censornode, tombstone=b''):
3065 3072 if self._format_version == REVLOGV0:
3066 3073 raise error.RevlogError(
3067 3074 _(b'cannot censor with version %d revlogs')
3068 3075 % self._format_version
3069 3076 )
3070 3077 elif self._format_version == REVLOGV1:
3071 3078 censor.v1_censor(self, tr, censornode, tombstone)
3072 3079 else:
3073 3080 censor.v2_censor(self, tr, censornode, tombstone)
3074 3081
3075 3082 def verifyintegrity(self, state):
3076 3083 """Verifies the integrity of the revlog.
3077 3084
3078 3085 Yields ``revlogproblem`` instances describing problems that are
3079 3086 found.
3080 3087 """
3081 3088 dd, di = self.checksize()
3082 3089 if dd:
3083 3090 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3084 3091 if di:
3085 3092 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3086 3093
3087 3094 version = self._format_version
3088 3095
3089 3096 # The verifier tells us what version revlog we should be.
3090 3097 if version != state[b'expectedversion']:
3091 3098 yield revlogproblem(
3092 3099 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3093 3100 % (self.display_id, version, state[b'expectedversion'])
3094 3101 )
3095 3102
3096 3103 state[b'skipread'] = set()
3097 3104 state[b'safe_renamed'] = set()
3098 3105
3099 3106 for rev in self:
3100 3107 node = self.node(rev)
3101 3108
3102 3109 # Verify contents. 4 cases to care about:
3103 3110 #
3104 3111 # common: the most common case
3105 3112 # rename: with a rename
3106 3113 # meta: file content starts with b'\1\n', the metadata
3107 3114 # header defined in filelog.py, but without a rename
3108 3115 # ext: content stored externally
3109 3116 #
3110 3117 # More formally, their differences are shown below:
3111 3118 #
3112 3119 # | common | rename | meta | ext
3113 3120 # -------------------------------------------------------
3114 3121 # flags() | 0 | 0 | 0 | not 0
3115 3122 # renamed() | False | True | False | ?
3116 3123 # rawtext[0:2]=='\1\n'| False | True | True | ?
3117 3124 #
3118 3125 # "rawtext" means the raw text stored in revlog data, which
3119 3126 # could be retrieved by "rawdata(rev)". "text"
3120 3127 # mentioned below is "revision(rev)".
3121 3128 #
3122 3129 # There are 3 different lengths stored physically:
3123 3130 # 1. L1: rawsize, stored in revlog index
3124 3131 # 2. L2: len(rawtext), stored in revlog data
3125 3132 # 3. L3: len(text), stored in revlog data if flags==0, or
3126 3133 # possibly somewhere else if flags!=0
3127 3134 #
3128 3135 # L1 should be equal to L2. L3 could be different from them.
3129 3136 # "text" may or may not affect commit hash depending on flag
3130 3137 # processors (see flagutil.addflagprocessor).
3131 3138 #
3132 3139 # | common | rename | meta | ext
3133 3140 # -------------------------------------------------
3134 3141 # rawsize() | L1 | L1 | L1 | L1
3135 3142 # size() | L1 | L2-LM | L1(*) | L1 (?)
3136 3143 # len(rawtext) | L2 | L2 | L2 | L2
3137 3144 # len(text) | L2 | L2 | L2 | L3
3138 3145 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3139 3146 #
3140 3147 # LM: length of metadata, depending on rawtext
3141 3148 # (*): not ideal, see comment in filelog.size
3142 3149 # (?): could be "- len(meta)" if the resolved content has
3143 3150 # rename metadata
3144 3151 #
3145 3152 # Checks needed to be done:
3146 3153 # 1. length check: L1 == L2, in all cases.
3147 3154 # 2. hash check: depending on flag processor, we may need to
3148 3155 # use either "text" (external), or "rawtext" (in revlog).
3149 3156
3150 3157 try:
3151 3158 skipflags = state.get(b'skipflags', 0)
3152 3159 if skipflags:
3153 3160 skipflags &= self.flags(rev)
3154 3161
3155 3162 _verify_revision(self, skipflags, state, node)
3156 3163
3157 3164 l1 = self.rawsize(rev)
3158 3165 l2 = len(self.rawdata(node))
3159 3166
3160 3167 if l1 != l2:
3161 3168 yield revlogproblem(
3162 3169 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3163 3170 node=node,
3164 3171 )
3165 3172
3166 3173 except error.CensoredNodeError:
3167 3174 if state[b'erroroncensored']:
3168 3175 yield revlogproblem(
3169 3176 error=_(b'censored file data'), node=node
3170 3177 )
3171 3178 state[b'skipread'].add(node)
3172 3179 except Exception as e:
3173 3180 yield revlogproblem(
3174 3181 error=_(b'unpacking %s: %s')
3175 3182 % (short(node), stringutil.forcebytestr(e)),
3176 3183 node=node,
3177 3184 )
3178 3185 state[b'skipread'].add(node)
3179 3186
3180 3187 def storageinfo(
3181 3188 self,
3182 3189 exclusivefiles=False,
3183 3190 sharedfiles=False,
3184 3191 revisionscount=False,
3185 3192 trackedsize=False,
3186 3193 storedsize=False,
3187 3194 ):
3188 3195 d = {}
3189 3196
3190 3197 if exclusivefiles:
3191 3198 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3192 3199 if not self._inline:
3193 3200 d[b'exclusivefiles'].append((self.opener, self._datafile))
3194 3201
3195 3202 if sharedfiles:
3196 3203 d[b'sharedfiles'] = []
3197 3204
3198 3205 if revisionscount:
3199 3206 d[b'revisionscount'] = len(self)
3200 3207
3201 3208 if trackedsize:
3202 3209 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3203 3210
3204 3211 if storedsize:
3205 3212 d[b'storedsize'] = sum(
3206 3213 self.opener.stat(path).st_size for path in self.files()
3207 3214 )
3208 3215
3209 3216 return d
3210 3217
3211 3218 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3212 3219 if not self.hassidedata:
3213 3220 return
3214 3221 # revlog formats with sidedata support does not support inline
3215 3222 assert not self._inline
3216 3223 if not helpers[1] and not helpers[2]:
3217 3224 # Nothing to generate or remove
3218 3225 return
3219 3226
3220 3227 new_entries = []
3221 3228 # append the new sidedata
3222 3229 with self._writing(transaction):
3223 3230 ifh, dfh, sdfh = self._writinghandles
3224 3231 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3225 3232
3226 3233 current_offset = sdfh.tell()
3227 3234 for rev in range(startrev, endrev + 1):
3228 3235 entry = self.index[rev]
3229 3236 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3230 3237 store=self,
3231 3238 sidedata_helpers=helpers,
3232 3239 sidedata={},
3233 3240 rev=rev,
3234 3241 )
3235 3242
3236 3243 serialized_sidedata = sidedatautil.serialize_sidedata(
3237 3244 new_sidedata
3238 3245 )
3239 3246
3240 3247 sidedata_compression_mode = COMP_MODE_INLINE
3241 3248 if serialized_sidedata and self.hassidedata:
3242 3249 sidedata_compression_mode = COMP_MODE_PLAIN
3243 3250 h, comp_sidedata = self.compress(serialized_sidedata)
3244 3251 if (
3245 3252 h != b'u'
3246 3253 and comp_sidedata[0] != b'\0'
3247 3254 and len(comp_sidedata) < len(serialized_sidedata)
3248 3255 ):
3249 3256 assert not h
3250 3257 if (
3251 3258 comp_sidedata[0]
3252 3259 == self._docket.default_compression_header
3253 3260 ):
3254 3261 sidedata_compression_mode = COMP_MODE_DEFAULT
3255 3262 serialized_sidedata = comp_sidedata
3256 3263 else:
3257 3264 sidedata_compression_mode = COMP_MODE_INLINE
3258 3265 serialized_sidedata = comp_sidedata
3259 3266 if entry[8] != 0 or entry[9] != 0:
3260 3267 # rewriting entries that already have sidedata is not
3261 3268 # supported yet, because it introduces garbage data in the
3262 3269 # revlog.
3263 3270 msg = b"rewriting existing sidedata is not supported yet"
3264 3271 raise error.Abort(msg)
3265 3272
3266 3273 # Apply (potential) flags to add and to remove after running
3267 3274 # the sidedata helpers
3268 3275 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3269 3276 entry_update = (
3270 3277 current_offset,
3271 3278 len(serialized_sidedata),
3272 3279 new_offset_flags,
3273 3280 sidedata_compression_mode,
3274 3281 )
3275 3282
3276 3283 # the sidedata computation might have move the file cursors around
3277 3284 sdfh.seek(current_offset, os.SEEK_SET)
3278 3285 sdfh.write(serialized_sidedata)
3279 3286 new_entries.append(entry_update)
3280 3287 current_offset += len(serialized_sidedata)
3281 3288 self._docket.sidedata_end = sdfh.tell()
3282 3289
3283 3290 # rewrite the new index entries
3284 3291 ifh.seek(startrev * self.index.entry_size)
3285 3292 for i, e in enumerate(new_entries):
3286 3293 rev = startrev + i
3287 3294 self.index.replace_sidedata_info(rev, *e)
3288 3295 packed = self.index.entry_binary(rev)
3289 3296 if rev == 0 and self._docket is None:
3290 3297 header = self._format_flags | self._format_version
3291 3298 header = self.index.pack_header(header)
3292 3299 packed = header + packed
3293 3300 ifh.write(packed)
@@ -1,138 +1,159
1 1 # Copyright Mercurial Contributors
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 import contextlib
7 7
8 8 from ..i18n import _
9 9 from .. import (
10 10 error,
11 11 util,
12 12 )
13 13
14 14
15 15 _MAX_CACHED_CHUNK_SIZE = 1048576 # 1 MiB
16 16
17 17 PARTIAL_READ_MSG = _(
18 18 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
19 19 )
20 20
21 21
22 22 def _is_power_of_two(n):
23 23 return (n & (n - 1) == 0) and n != 0
24 24
25 25
26 26 class randomaccessfile(object):
27 27 """Accessing arbitrary chuncks of data within a file, with some caching"""
28 28
29 29 def __init__(
30 30 self,
31 31 opener,
32 32 filename,
33 33 default_cached_chunk_size,
34 34 initial_cache=None,
35 35 ):
36 36 # Required by bitwise manipulation below
37 37 assert _is_power_of_two(default_cached_chunk_size)
38 38
39 39 self.opener = opener
40 40 self.filename = filename
41 41 self.default_cached_chunk_size = default_cached_chunk_size
42 42 self.writing_handle = None # This is set from revlog.py
43 self.reading_handle = None
43 44 self._cached_chunk = b''
44 45 self._cached_chunk_position = 0 # Offset from the start of the file
45 46 if initial_cache:
46 47 self._cached_chunk_position, self._cached_chunk = initial_cache
47 48
48 49 def clear_cache(self):
49 50 self._cached_chunk = b''
50 51 self._cached_chunk_position = 0
51 52
52 53 def _open(self, mode=b'r'):
53 54 """Return a file object"""
54 55 return self.opener(self.filename, mode=mode)
55 56
56 57 @contextlib.contextmanager
57 58 def _open_read(self, existing_file_obj=None):
58 59 """File object suitable for reading data"""
59 60 # Use explicit file handle, if given.
60 61 if existing_file_obj is not None:
61 62 yield existing_file_obj
62 63
63 64 # Use a file handle being actively used for writes, if available.
64 65 # There is some danger to doing this because reads will seek the
65 66 # file. However, revlog._writeentry performs a SEEK_END before all
66 67 # writes, so we should be safe.
67 68 elif self.writing_handle:
68 69 yield self.writing_handle
69 70
71 elif self.reading_handle:
72 yield self.reading_handle
73
70 74 # Otherwise open a new file handle.
71 75 else:
72 76 with self._open() as fp:
73 77 yield fp
74 78
79 @contextlib.contextmanager
80 def reading(self):
81 """Context manager that keeps the file open for reading"""
82 if (
83 self.reading_handle is None
84 and self.writing_handle is None
85 and self.filename is not None
86 ):
87 with self._open() as fp:
88 self.reading_handle = fp
89 try:
90 yield
91 finally:
92 self.reading_handle = None
93 else:
94 yield
95
75 96 def read_chunk(self, offset, length, existing_file_obj=None):
76 97 """Read a chunk of bytes from the file.
77 98
78 99 Accepts an absolute offset, length to read, and an optional existing
79 100 file handle to read from.
80 101
81 102 If an existing file handle is passed, it will be seeked and the
82 103 original seek position will NOT be restored.
83 104
84 105 Returns a str or buffer of raw byte data.
85 106
86 107 Raises if the requested number of bytes could not be read.
87 108 """
88 109 end = offset + length
89 110 cache_start = self._cached_chunk_position
90 111 cache_end = cache_start + len(self._cached_chunk)
91 112 # Is the requested chunk within the cache?
92 113 if cache_start <= offset and end <= cache_end:
93 114 if cache_start == offset and end == cache_end:
94 115 return self._cached_chunk # avoid a copy
95 116 relative_start = offset - cache_start
96 117 return util.buffer(self._cached_chunk, relative_start, length)
97 118
98 119 return self._read_and_update_cache(offset, length, existing_file_obj)
99 120
100 121 def _read_and_update_cache(self, offset, length, existing_file_obj=None):
101 122 # Cache data both forward and backward around the requested
102 123 # data, in a fixed size window. This helps speed up operations
103 124 # involving reading the revlog backwards.
104 125 real_offset = offset & ~(self.default_cached_chunk_size - 1)
105 126 real_length = (
106 127 (offset + length + self.default_cached_chunk_size)
107 128 & ~(self.default_cached_chunk_size - 1)
108 129 ) - real_offset
109 130 with self._open_read(existing_file_obj) as file_obj:
110 131 file_obj.seek(real_offset)
111 132 data = file_obj.read(real_length)
112 133
113 134 self._add_cached_chunk(real_offset, data)
114 135
115 136 relative_offset = offset - real_offset
116 137 got = len(data) - relative_offset
117 138 if got < length:
118 139 message = PARTIAL_READ_MSG % (self.filename, length, offset, got)
119 140 raise error.RevlogError(message)
120 141
121 142 if offset != real_offset or real_length != length:
122 143 return util.buffer(data, relative_offset, length)
123 144 return data
124 145
125 146 def _add_cached_chunk(self, offset, data):
126 147 """Add to or replace the cached data chunk.
127 148
128 149 Accepts an absolute offset and the data that is at that location.
129 150 """
130 151 if (
131 152 self._cached_chunk_position + len(self._cached_chunk) == offset
132 153 and len(self._cached_chunk) + len(data) < _MAX_CACHED_CHUNK_SIZE
133 154 ):
134 155 # add to existing cache
135 156 self._cached_chunk += data
136 157 else:
137 158 self._cached_chunk = data
138 159 self._cached_chunk_position = offset
General Comments 0
You need to be logged in to leave comments. Login now