diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py --- a/mercurial/debugcommands.py +++ b/mercurial/debugcommands.py @@ -71,6 +71,7 @@ from . import ( registrar, repair, repoview, + requirements, revlog, revset, revsetlang, @@ -105,6 +106,7 @@ from .utils import ( from .revlogutils import ( deltas as deltautil, nodemap, + rewrite, sidedata, ) @@ -1451,6 +1453,63 @@ def debugfileset(ui, repo, expr, **opts) ui.write(b"%s\n" % f) +@command( + b"debug-repair-issue6528", + [ + ( + b'', + b'to-report', + b'', + _(b'build a report of affected revisions to this file'), + _(b'FILE'), + ), + ( + b'', + b'from-report', + b'', + _(b'repair revisions listed in this report file'), + _(b'FILE'), + ), + ] + + cmdutil.dryrunopts, +) +def debug_repair_issue6528(ui, repo, **opts): + """find affected revisions and repair them. See issue6528 for more details. + + The `--to-report` and `--from-report` flags allow you to cache and reuse the + computation of affected revisions for a given repository across clones. + The report format is line-based (with empty lines ignored): + + ``` + ,... + ``` + + There can be multiple broken revisions per filelog, they are separated by + a comma with no spaces. The only space is between the revision(s) and the + filename. + + Note that this does *not* mean that this repairs future affected revisions, + that needs a separate fix at the exchange level that hasn't been written yet + (as of 5.9rc0). + """ + cmdutil.check_incompatible_arguments( + opts, 'to_report', ['from_report', 'dry_run'] + ) + dry_run = opts.get('dry_run') + to_report = opts.get('to_report') + from_report = opts.get('from_report') + # TODO maybe add filelog pattern and revision pattern parameters to help + # narrow down the search for users that know what they're looking for? + + if requirements.REVLOGV1_REQUIREMENT not in repo.requirements: + msg = b"can only repair revlogv1 repositories, v2 is not affected" + raise error.Abort(_(msg)) + + rewrite.repair_issue6528( + ui, repo, dry_run=dry_run, to_report=to_report, from_report=from_report + ) + + @command(b'debugformat', [] + cmdutil.formatteropts) def debugformat(ui, repo, **opts): """display format information about the current repository diff --git a/mercurial/revlogutils/rewrite.py b/mercurial/revlogutils/rewrite.py --- a/mercurial/revlogutils/rewrite.py +++ b/mercurial/revlogutils/rewrite.py @@ -7,6 +7,7 @@ # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. +import binascii import contextlib import os @@ -472,3 +473,224 @@ def _rewrite_censor( new_index_file.write(entry_bin) revlog._docket.index_end = new_index_file.tell() revlog._docket.data_end = new_data_file.tell() + + +def _get_filename_from_filelog_index(path): + # Drop the extension and the `data/` prefix + path_part = path.rsplit(b'.', 1)[0].split(b'/', 1) + if len(path_part) < 2: + msg = _(b"cannot recognize filelog from filename: '%s'") + msg %= path + raise error.Abort(msg) + + return path_part[1] + + +def _filelog_from_filename(repo, path): + """Returns the filelog for the given `path`. Stolen from `engine.py`""" + + from .. import filelog # avoid cycle + + fl = filelog.filelog(repo.svfs, path) + return fl + + +def _write_swapped_parents(repo, rl, rev, offset, fp): + """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`""" + from ..pure import parsers # avoid cycle + + if repo._currentlock(repo._lockref) is None: + # Let's be paranoid about it + msg = "repo needs to be locked to rewrite parents" + raise error.ProgrammingError(msg) + + index_format = parsers.IndexObject.index_format + entry = rl.index[rev] + new_entry = list(entry) + new_entry[5], new_entry[6] = entry[6], entry[5] + packed = index_format.pack(*new_entry[:8]) + fp.seek(offset) + fp.write(packed) + + +def _reorder_filelog_parents(repo, fl, to_fix): + """ + Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the + new version to disk, overwriting the old one with a rename. + """ + from ..pure import parsers # avoid cycle + + ui = repo.ui + assert len(to_fix) > 0 + rl = fl._revlog + if rl._format_version != constants.REVLOGV1: + msg = "expected version 1 revlog, got version '%d'" % rl._format_version + raise error.ProgrammingError(msg) + + index_file = rl._indexfile + new_file_path = index_file + b'.tmp-parents-fix' + repaired_msg = _(b"repaired revision %d of 'filelog %s'\n") + + with ui.uninterruptible(): + try: + util.copyfile( + rl.opener.join(index_file), + rl.opener.join(new_file_path), + checkambig=rl._checkambig, + ) + + with rl.opener(new_file_path, mode=b"r+") as fp: + if rl._inline: + index = parsers.InlinedIndexObject(fp.read()) + for rev in fl.revs(): + if rev in to_fix: + offset = index._calculate_index(rev) + _write_swapped_parents(repo, rl, rev, offset, fp) + ui.write(repaired_msg % (rev, index_file)) + else: + index_format = parsers.IndexObject.index_format + for rev in to_fix: + offset = rev * index_format.size + _write_swapped_parents(repo, rl, rev, offset, fp) + ui.write(repaired_msg % (rev, index_file)) + + rl.opener.rename(new_file_path, index_file) + rl.clearcaches() + rl._loadindex() + finally: + util.tryunlink(new_file_path) + + +def _is_revision_affected(ui, fl, filerev, path): + """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a + special meaning compared to the reverse in the context of filelog-based + copytracing. issue6528 exists because new code assumed that parent ordering + didn't matter, so this detects if the revision contains metadata (since + it's only used for filelog-based copytracing) and its parents are in the + "wrong" order.""" + try: + raw_text = fl.rawdata(filerev) + except error.CensoredNodeError: + # We don't care about censored nodes as they never carry metadata + return False + has_meta = raw_text.startswith(b'\x01\n') + if has_meta: + (p1, p2) = fl.parentrevs(filerev) + if p1 != nullrev and p2 == nullrev: + return True + return False + + +def _from_report(ui, repo, context, from_report, dry_run): + """ + Fix the revisions given in the `from_report` file, but still checks if the + revisions are indeed affected to prevent an unfortunate cyclic situation + where we'd swap well-ordered parents again. + + See the doc for `debug_fix_issue6528` for the format documentation. + """ + ui.write(_(b"loading report file '%s'\n") % from_report) + + with context(), open(from_report, mode='rb') as f: + for line in f.read().split(b'\n'): + if not line: + continue + filenodes, filename = line.split(b' ', 1) + fl = _filelog_from_filename(repo, filename) + to_fix = set( + fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',') + ) + excluded = set() + + for filerev in to_fix: + if _is_revision_affected(ui, fl, filerev, filename): + msg = b"found affected revision %d for filelog '%s'\n" + ui.warn(msg % (filerev, filename)) + else: + msg = _(b"revision %s of file '%s' is not affected\n") + msg %= (binascii.hexlify(fl.node(filerev)), filename) + ui.warn(msg) + excluded.add(filerev) + + to_fix = to_fix - excluded + if not to_fix: + msg = _(b"no affected revisions were found for '%s'\n") + ui.write(msg % filename) + continue + if not dry_run: + _reorder_filelog_parents(repo, fl, sorted(to_fix)) + + +def repair_issue6528(ui, repo, dry_run=False, to_report=None, from_report=None): + from .. import store # avoid cycle + + @contextlib.contextmanager + def context(): + if dry_run or to_report: # No need for locking + yield + else: + with repo.wlock(), repo.lock(): + yield + + if from_report: + return _from_report(ui, repo, context, from_report, dry_run) + + report_entries = [] + + with context(): + files = list( + (file_type, path) + for (file_type, path, _e, _s) in repo.store.datafiles() + if path.endswith(b'.i') and file_type & store.FILEFLAGS_FILELOG + ) + + progress = ui.makeprogress( + _(b"looking for affected revisions"), + unit=_(b"filelogs"), + total=len(files), + ) + found_nothing = True + + for file_type, path in files: + if ( + not path.endswith(b'.i') + or not file_type & store.FILEFLAGS_FILELOG + ): + continue + progress.increment() + filename = _get_filename_from_filelog_index(path) + fl = _filelog_from_filename(repo, filename) + + # Set of filerevs (or hex filenodes if `to_report`) that need fixing + to_fix = set() + for filerev in fl.revs(): + # TODO speed up by looking at the start of the delta + # If it hasn't changed, it's not worth looking at the other revs + # in the same chain + affected = _is_revision_affected(ui, fl, filerev, path) + if affected: + msg = b"found affected revision %d for filelog '%s'\n" + ui.warn(msg % (filerev, path)) + found_nothing = False + if not dry_run: + if to_report: + to_fix.add(binascii.hexlify(fl.node(filerev))) + else: + to_fix.add(filerev) + + if to_fix: + to_fix = sorted(to_fix) + if to_report: + report_entries.append((filename, to_fix)) + else: + _reorder_filelog_parents(repo, fl, to_fix) + + if found_nothing: + ui.write(_(b"no affected revisions were found\n")) + + if to_report and report_entries: + with open(to_report, mode="wb") as f: + for path, to_fix in report_entries: + f.write(b"%s %s\n" % (b",".join(to_fix), path)) + + progress.complete() diff --git a/tests/bundles/issue6528.tar b/tests/bundles/issue6528.tar new file mode 100644 index 0000000000000000000000000000000000000000..f92105258524076c68bbb0fadb3827fcf801edd3 GIT binary patch literal 61440 zc%1EB30Txt*A4?Hj^GAr(N^NFD3D~b%!pPHL~Ruims&T>%w%9>7M#VkpMtwptF&sZ zqT*inx>s#~QCoMd)_p7GQ_)(BR&8smeEvyhfLWk3Z0W*%A8tZ2lbka-dC$4`=AN6- zFsRCbu%HNnTcZ?#*sco!p*V_Q1ja5<2tiTy4}yuRR}F&9MiXs-VTjhGCesF{)bF|a z{2z+Yu+ZeBD!vEGV1EQhG0y%tg#h~t0gL^|C}>48Q&qQt`}SwQhO+xnjKWA@e<5IJ zf3_%6)JA43qo_<7y!}~+?!NsAHUdB}QkhDW24H^&`)ky?lo0lsBr&E+lEE36#PKrN zpF{`}*k1^gZ2!uY!SkP@%3yzr!X*%lRnh%u2Vj2(`|I>o;s49Vf7bs4{tE(&{Sicw zOzV;ujXo(LU`8`K6KpoB zb?lC02G(nouz^X{!y0;=-drWV7(j4X_+O>Xzw)&I2nhw+e?q|4{sy@sL`Q2GW5{T< zGG(}q|CEFMagxA*{}%#EMn#)7z?)ZBIP#wj`;|?9m;6Va{69`$p#Mh@{IYg!GPkC> z#82YZ^ia)NZoTq!hnw6w`(=I(x8Blt;}6{Wbep~j+&cWhf%v~dUx3PmE&r2~Omvb_ zrPC`JV`X{(SNxYaxBoHF|0@g%`R|BAf8dBh|KNy1f8mHi|KW&2f8vNj|KbSXZ>k2i z{O8z@Re=nb^?yR*z<&vW0R9UC3YN$jS}rFLDS_fjr3yzd34@R-CEH>rNhzwr2ppHt z3@ZMsRRyR5IPpKk$QVX5%F2{ccK+iO3Htv90pNf2hAsc)23n^`#zID`jcU23@@{pP z?SBdBSpUZ*IN-k!aF_isC3gF(df3{l2wcYB9)H6BkYu$|$>=IBqwM@ANs<8k7XpC) z)f*-8-_imItt|SNhyMfy#(xL`!2jxv()nL`GRjf^$4CnBUkCvHS8tTWf3;R?Hqn*W z15ghBQ(*kRFaZ3o-YALxa@ts__j%d*k8|l_fd2vj@V|P)`T1XE_x_jX`5%EwB!K@y z06hPz-mviBtW)Ykm1+aa1ykkU;;#AcH2#jf>EDw5x>fg1sb*uc*`Q&6gVC7GhQpCcJriNQKA}3s#QrLNpD~0zNeX+Q zb{zj7T!^w9$p5P11H+Q_T896U^_%?XV(}2OdKh~EH!lNB>y$8~rPUg^okp*qHQXgC zx+E6cb%gb)CbeEiYua~&nb4$A_E@u#G4RKCf#0H8nbCR%?rf)`La%)j1o7$!i~p~( zraupt@gH@X|AoXs|GyvrPwJ{SEc~ysu74gb+y5AD!+(@-|AYJ=VZi0pPYby9O!Q`r z0`tJ+pw`iya!dN24uaCxupF=up#GQ{i6!!o6~CuaNo!R1DVke`=C}Rx)y0 zkz!5_9ff0~ReFu8vL=M%d-xH=jrM=x{O<(Cwg15UZ&i;!wZDNGWmX%QigpOO_Wu$G z`%?r4+W&$;sW&k?gsRXTnYkT{9aF$P+rWHcsPY+y!f^hqIX`!JQg4~%TXGGx5b zq_plIjf#z_w1I72@|$8K+X7`kRq~&NKjlAf&x%I@-v5{B{T~#HNPzzr0&dtJLA10^ ztzwL(a?TCy!vAwdx3B+WC}#vQHmldnKd{dcJe%|g_jw0 z_IdP$Q(b-;PeecUZkFC9dY$gm667hBG6Kahg_4pf2m&P~7{cz7;3}m;hEfcnQjoM9 zlVBEkBkbgftc8~s-g4wa<&v(CB3~_^)AUVW-77aqkr3lH$8G;)uDaU9z zDV0iP5`_{&QAEaIQUYan$}IBc+sX5^7EY$eMX&gU0kF@WeP8v?t!Kzizox#ls|0y8 zA(cx=T1m=D0w+)zAtNwKCP8E}5|t=enWRcWkP^8?UN*lRZY`WFFK*5KzN`D2UO4~o zz=|e!e-C!{DZuU+nHRG5J>B@VM#XxX#fx-rU;ni(W$}-1r^)!La${@}`sDk3_w^ zqP<|T2x`4ByY>j)svgaFK?AMYd0UGYr`?^$Q(xcLCgFL&`Ffuf)VdkHf7{k5C+&u7 zMbL-d_5D9fuakZC_}Rt(SvPja(go*gbv;va@~NPl%LRt-S9}vatmRjq*H!n(-#>gy zR*N0SCoNxnQ@{4s6!EVGDr z$+sbQmaq1n$8Axg{lWilU`BU>-EI(ih>JaloHmGyRDqyawiHeV%~gYW9}=7vx$Gr+C1w}iX=r^`=&m;5IYi6j3p49x!` z3`&mwP-VvdqRyT2#Q)d%Wk1N`3b;O$qPj_|2R|+yvFeAot$ADKuCH&e_rFqyAKZR*pxi;qchFxEpOL1 zc%SXzMI586HvV%g<9*25+2ocKrhDMK3FDAY=+4a>Jsnup@qY!sv~@v6)uZMk6Yqci zQ}Ev91sL(itxuPn-`>^k<+-I@9t>w_MlO?+2#ql^Ttzbs>yV^MnnFm1Vk9b*Ay5^@ zl;(n-@XI*146v$Hv(Rb};=)=fPOsguEC;7OI?TCy^!g#Q%L^nSWDLy;r!kUHDsZ_R zr)4BXsZfRAj{imVfA0NIqOiqdpn;9> ziH{pzf3j@UL)*jor%v&$MUcq5UcmhH!1#m_t=EUdFOF%@9?NPteQ|>>yT*=M;W;F7 z3!Mw?jP9~a`flLv>^?KsFaCg@@r`kv_q;nRrmh$nt-tyXTC7HKw6$wLa4)5bNxE{Y z>B=782dt_MKX{z|jjb|fHe4|(DUK^83b{n7A}E@@k#Lm+Ay7t&N|ZQGlO#h+#atSa z7=aP7B`%8`h_dcwGhEFaxjUJKfbujd3 z=FmegoUV`Nk_bB7-M{ht?lrbG7$T9+^lu-l%DNQSb>PDeM`sLGf=K10T&BQr z1;f_MDNKP#QKgKODG51VeAVPdSB)_wF^P&!utKlZs!cGz%`4QOKev`G;~>lP=%Q!R z+&jU1epz0>hsZDM;HA#*%)Gob!|9rhYwZa=ncgUH{)Sq~Ice}OAKiR1FC3qDdC~mq z6OLYOvQnfEiy7UqRR`VV!1y=j=)-*SzwFfwYO|nYu5ZZU_rKln=qGR4l)MkFCN>*( z%*QA8`munNyW8iOw?+}#jos9PYX*y;i0HzrcZiqEDb6Lq*gc}=UR3r-_1cH`Hw|4s z;Y#GTvwOdZb5b_>t_b?JdqCj)^?s>|%Z7{Ewr~IY!Gr~>UHFF{E%Uzr#3M0mg68Ur z@sauOzkM&OMqKQwTA2Z{J9ckt{MF-gAH-8>8-E(MX36siUG(A;OJbI$Wxw8TZIi1T zMItD<&Z(i^d?icNhL<;nE!@?zdt%MKHSPPiUxb`*lXkoplG5jN(s7xSk{M=CXjKe* zMc0c{6a6OFJ^JP>{6e?yPK1VH%>uSw>E3Ws*vJcAUJd`CH|}ZdxkD-O*!S(BX$O=_ z|DbQX?e=;(L3VDP%5HxN0rJ0vfs_3! zGSIsexbpwS914ddM#g_VqrOTF?(kum9F2rs?~J zuj|Q86Z~_2kX@Yk<+U5wir*(?j`?-kn3nBlEKdDz*5=o#wF3r-F1F0(^ZR68XqS0={HZoC#$SGQ;(+FIzd^>igFEhg z)GbEcxOh&InSeZ&3S3T{y~0c8(W3|J>r*O@HarkVe9`ZJke7ra5erH zk(5yX1Nr~L0L=gNZ<+sT2vm;wpV$C(@_=79?AyKX$mzY1pr#KV);}@*UafZ6#8pFo zFi}$vJ~bvDn>f1N8y5zzy7oorkybSW=ho>PJNs78sSNz!;63p)zuRj9SML{X-W0a* zt9k)#@h5Yhiw26Da`OR09!+^E2~asEf8tcz=a+`~LJ=dUN6tzdDw%aVNhCg6^ZfGs zPWkt~!{)Y)Tr(Jni+nApxp{h@koLKw!~k&-)l?Q+U8Hc%2AKqh7YO{Zc?|vBRRmo z7WVPKf{8w*z-9gq?il|Ggaprjgh2)Ke^>`}f^FbL_;_cB4>*@r<7!+tBCpp~(a<`J z^#2`ttTWu_%h_?t71<9*9_fB%>V{o;*rLDz`7P+?nZGpZJCqtd{7}tfAANeK>ziFO z8g(pEqqxJTO!+^oe?Ac|<|4+qF6)nbFIco@_uh=fb)_?>1U~z9vaarMzh7QE9kV~9 z`;e(kjPdGAJz6UkeK)Men#5Y(wU1|x5#1l3x#!r<;MnK>2giGyNqlVqe^yI>S0Yr7 z>>n;*pqxSx=Xr%jUSG971Mic)QoJNmA9nHX(-}ROb*F=6TYIl-)OP29#-E7Q^I>#$ zLTs&}`Wa~+W1k&c`E0W0<7PKa{;xkyyt7&!{-Z8z^z4c9dqLNdo8RDN6ck;rze(m# zEPrm!fs{G(hrOFlMT$kJZ%pogVl!&D^Sj+Y>8Hq(EAYsAA*a25e(rhey-mOU5!L+q z$Dz87Lww$w+o#2%1dk2B-54CTDgKX){-2+Fy5v=>-sZP12h7-;5tg&`=V{t8Yo7KF z%sIQ_XXC)DOO21;i~F{F8y&Cc-Xg=6IqS#Ku>Duxe;E1Jb^LzTWYxJFwavdDxz~_f zBE5r;xqa?hz@VtC=yiCCDz-(NX%ikB!s|8YxTR~Igxa$F^}1h=UbtfTdXC@!JUc|m z0y-2x(giRtO4Nv#Gzuz5zR%c9Vh&p8Fyh;N%!G zG`Jp=Ju&f6ZQtMMpSsTMID2rt1~(7iTY9cHI(YoHWk*MR6R@=3+N0gjK`r_#`^*n% zHEg!Wx{o8pwT>MPih5U+7yEQo=jZD_8P!-ie$t@L1s>bAuj3HFpTRc%S2)q96u6)N zQ`G)PC1Cz9K>+f9syA%;U*SZbQs9#RsN?e=k{c2T_%8r3knmHj;mH50kl~8|#rZ$Q z<9|_*|0@V8lHlWzE996;^jH5h05{+i|0_J(lQUe#f9LnVkzoFBVPI?jDlk0{_w8Sp z|A|u)V1Geiv417K=&uyGv_It(|1mKBOBlHI{y*@Zzke-4Ew#NWmEn^An4|wkDe(Sh zVNl}ivH;Zvp $TESTTMP/ext/small_inline.py + > from mercurial import revlog + > revlog._maxinline = 8 + > EOF + + $ cat << EOF >> $HGRCPATH + > [extensions] + > small_inline=$TESTTMP/ext/small_inline.py + > EOF + + $ mkdir repo-to-fix-not-inline + $ cd repo-to-fix-not-inline +#if windows +tar interprets `:` in paths (like `C:`) as being remote, force local on Windows +only since some versions of tar don't have this flag. + + $ tar --force-local -xf $TESTDIR/bundles/issue6528.tar +#else + $ tar xf $TESTDIR/bundles/issue6528.tar +#endif + $ echo b >> b.txt + $ hg commit -qm "inline -> separate" + $ find .hg -name *b.txt.d + .hg/store/data/b.txt.d + +Status is correct, but the problem is still there, in the earlier revision + $ hg st + $ hg up 3 + 1 files updated, 0 files merged, 1 files removed, 0 files unresolved + $ hg st + M b.txt + $ hg debugrevlogindex b.txt + rev linkrev nodeid p1 p2 + 0 2 05b806ebe5ea 000000000000 000000000000 + 1 3 a58b36ad6b65 05b806ebe5ea 000000000000 + 2 6 216a5fe8b8ed 000000000000 000000000000 + 3 7 ea4f2f2463cc 216a5fe8b8ed 000000000000 + 4 8 db234885e2fe ea4f2f2463cc 000000000000 + $ hg debugrevlogindex D.txt + rev linkrev nodeid p1 p2 + 0 6 2a8d3833f2fb 000000000000 000000000000 + 1 7 2a80419dfc31 2a8d3833f2fb 000000000000 + 2 8 65aecc89bb5d 2a80419dfc31 000000000000 + +Run the fix on the non-inline revlog + $ hg debug-repair-issue6528 + found affected revision 1 for filelog 'data/D.txt.i' + repaired revision 1 of 'filelog data/D.txt.i' + found affected revision 1 for filelog 'data/b.txt.i' + found affected revision 3 for filelog 'data/b.txt.i' + repaired revision 1 of 'filelog data/b.txt.i' + repaired revision 3 of 'filelog data/b.txt.i' + +Check that it worked + $ hg debugrevlogindex b.txt + rev linkrev nodeid p1 p2 + 0 2 05b806ebe5ea 000000000000 000000000000 + 1 3 a58b36ad6b65 000000000000 05b806ebe5ea + 2 6 216a5fe8b8ed 000000000000 000000000000 + 3 7 ea4f2f2463cc 000000000000 216a5fe8b8ed + 4 8 db234885e2fe ea4f2f2463cc 000000000000 + $ hg debugrevlogindex D.txt + rev linkrev nodeid p1 p2 + 0 6 2a8d3833f2fb 000000000000 000000000000 + 1 7 2a80419dfc31 000000000000 2a8d3833f2fb + 2 8 65aecc89bb5d 2a80419dfc31 000000000000 + $ hg debug-repair-issue6528 + no affected revisions were found + $ hg st