##// END OF EJS Templates
issue6528: also filter delta on the fly when applying a changegroup...
marmoute -
r48629:c30ca163 stable
parent child Browse files
Show More
1 NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
@@ -1,287 +1,291 b''
1 1 # filelog.py - file history class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import nullrev
12 12 from . import (
13 13 error,
14 14 revlog,
15 15 )
16 16 from .interfaces import (
17 17 repository,
18 18 util as interfaceutil,
19 19 )
20 20 from .utils import storageutil
21 21 from .revlogutils import (
22 22 constants as revlog_constants,
23 rewrite,
23 24 )
24 25
25 26
26 27 @interfaceutil.implementer(repository.ifilestorage)
27 28 class filelog(object):
28 29 def __init__(self, opener, path):
29 30 self._revlog = revlog.revlog(
30 31 opener,
31 32 # XXX should use the unencoded path
32 33 target=(revlog_constants.KIND_FILELOG, path),
33 34 radix=b'/'.join((b'data', path)),
34 35 censorable=True,
35 36 )
36 37 # Full name of the user visible file, relative to the repository root.
37 38 # Used by LFS.
38 39 self._revlog.filename = path
39 40 self.nullid = self._revlog.nullid
40 41
41 42 def __len__(self):
42 43 return len(self._revlog)
43 44
44 45 def __iter__(self):
45 46 return self._revlog.__iter__()
46 47
47 48 def hasnode(self, node):
48 49 if node in (self.nullid, nullrev):
49 50 return False
50 51
51 52 try:
52 53 self._revlog.rev(node)
53 54 return True
54 55 except (TypeError, ValueError, IndexError, error.LookupError):
55 56 return False
56 57
57 58 def revs(self, start=0, stop=None):
58 59 return self._revlog.revs(start=start, stop=stop)
59 60
60 61 def parents(self, node):
61 62 return self._revlog.parents(node)
62 63
63 64 def parentrevs(self, rev):
64 65 return self._revlog.parentrevs(rev)
65 66
66 67 def rev(self, node):
67 68 return self._revlog.rev(node)
68 69
69 70 def node(self, rev):
70 71 return self._revlog.node(rev)
71 72
72 73 def lookup(self, node):
73 74 return storageutil.fileidlookup(
74 75 self._revlog, node, self._revlog.display_id
75 76 )
76 77
77 78 def linkrev(self, rev):
78 79 return self._revlog.linkrev(rev)
79 80
80 81 def commonancestorsheads(self, node1, node2):
81 82 return self._revlog.commonancestorsheads(node1, node2)
82 83
83 84 # Used by dagop.blockdescendants().
84 85 def descendants(self, revs):
85 86 return self._revlog.descendants(revs)
86 87
87 88 def heads(self, start=None, stop=None):
88 89 return self._revlog.heads(start, stop)
89 90
90 91 # Used by hgweb, children extension.
91 92 def children(self, node):
92 93 return self._revlog.children(node)
93 94
94 95 def iscensored(self, rev):
95 96 return self._revlog.iscensored(rev)
96 97
97 98 def revision(self, node, _df=None, raw=False):
98 99 return self._revlog.revision(node, _df=_df, raw=raw)
99 100
100 101 def rawdata(self, node, _df=None):
101 102 return self._revlog.rawdata(node, _df=_df)
102 103
103 104 def emitrevisions(
104 105 self,
105 106 nodes,
106 107 nodesorder=None,
107 108 revisiondata=False,
108 109 assumehaveparentrevisions=False,
109 110 deltamode=repository.CG_DELTAMODE_STD,
110 111 sidedata_helpers=None,
111 112 ):
112 113 return self._revlog.emitrevisions(
113 114 nodes,
114 115 nodesorder=nodesorder,
115 116 revisiondata=revisiondata,
116 117 assumehaveparentrevisions=assumehaveparentrevisions,
117 118 deltamode=deltamode,
118 119 sidedata_helpers=sidedata_helpers,
119 120 )
120 121
121 122 def addrevision(
122 123 self,
123 124 revisiondata,
124 125 transaction,
125 126 linkrev,
126 127 p1,
127 128 p2,
128 129 node=None,
129 130 flags=revlog.REVIDX_DEFAULT_FLAGS,
130 131 cachedelta=None,
131 132 ):
132 133 return self._revlog.addrevision(
133 134 revisiondata,
134 135 transaction,
135 136 linkrev,
136 137 p1,
137 138 p2,
138 139 node=node,
139 140 flags=flags,
140 141 cachedelta=cachedelta,
141 142 )
142 143
143 144 def addgroup(
144 145 self,
145 146 deltas,
146 147 linkmapper,
147 148 transaction,
148 149 addrevisioncb=None,
149 150 duplicaterevisioncb=None,
150 151 maybemissingparents=False,
151 152 ):
152 153 if maybemissingparents:
153 154 raise error.Abort(
154 155 _(
155 156 b'revlog storage does not support missing '
156 157 b'parents write mode'
157 158 )
158 159 )
159 160
160 161 with self._revlog._writing(transaction):
162
163 deltas = rewrite.filter_delta_issue6528(self._revlog, deltas)
164
161 165 return self._revlog.addgroup(
162 166 deltas,
163 167 linkmapper,
164 168 transaction,
165 169 addrevisioncb=addrevisioncb,
166 170 duplicaterevisioncb=duplicaterevisioncb,
167 171 )
168 172
169 173 def getstrippoint(self, minlink):
170 174 return self._revlog.getstrippoint(minlink)
171 175
172 176 def strip(self, minlink, transaction):
173 177 return self._revlog.strip(minlink, transaction)
174 178
175 179 def censorrevision(self, tr, node, tombstone=b''):
176 180 return self._revlog.censorrevision(tr, node, tombstone=tombstone)
177 181
178 182 def files(self):
179 183 return self._revlog.files()
180 184
181 185 def read(self, node):
182 186 return storageutil.filtermetadata(self.revision(node))
183 187
184 188 def add(self, text, meta, transaction, link, p1=None, p2=None):
185 189 if meta or text.startswith(b'\1\n'):
186 190 text = storageutil.packmeta(meta, text)
187 191 rev = self.addrevision(text, transaction, link, p1, p2)
188 192 return self.node(rev)
189 193
190 194 def renamed(self, node):
191 195 return storageutil.filerevisioncopied(self, node)
192 196
193 197 def size(self, rev):
194 198 """return the size of a given revision"""
195 199
196 200 # for revisions with renames, we have to go the slow way
197 201 node = self.node(rev)
198 202 if self.renamed(node):
199 203 return len(self.read(node))
200 204 if self.iscensored(rev):
201 205 return 0
202 206
203 207 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
204 208 return self._revlog.size(rev)
205 209
206 210 def cmp(self, node, text):
207 211 """compare text with a given file revision
208 212
209 213 returns True if text is different than what is stored.
210 214 """
211 215 return not storageutil.filedataequivalent(self, node, text)
212 216
213 217 def verifyintegrity(self, state):
214 218 return self._revlog.verifyintegrity(state)
215 219
216 220 def storageinfo(
217 221 self,
218 222 exclusivefiles=False,
219 223 sharedfiles=False,
220 224 revisionscount=False,
221 225 trackedsize=False,
222 226 storedsize=False,
223 227 ):
224 228 return self._revlog.storageinfo(
225 229 exclusivefiles=exclusivefiles,
226 230 sharedfiles=sharedfiles,
227 231 revisionscount=revisionscount,
228 232 trackedsize=trackedsize,
229 233 storedsize=storedsize,
230 234 )
231 235
232 236 # Used by repo upgrade.
233 237 def clone(self, tr, destrevlog, **kwargs):
234 238 if not isinstance(destrevlog, filelog):
235 239 raise error.ProgrammingError(b'expected filelog to clone()')
236 240
237 241 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
238 242
239 243
240 244 class narrowfilelog(filelog):
241 245 """Filelog variation to be used with narrow stores."""
242 246
243 247 def __init__(self, opener, path, narrowmatch):
244 248 super(narrowfilelog, self).__init__(opener, path)
245 249 self._narrowmatch = narrowmatch
246 250
247 251 def renamed(self, node):
248 252 res = super(narrowfilelog, self).renamed(node)
249 253
250 254 # Renames that come from outside the narrowspec are problematic
251 255 # because we may lack the base text for the rename. This can result
252 256 # in code attempting to walk the ancestry or compute a diff
253 257 # encountering a missing revision. We address this by silently
254 258 # removing rename metadata if the source file is outside the
255 259 # narrow spec.
256 260 #
257 261 # A better solution would be to see if the base revision is available,
258 262 # rather than assuming it isn't.
259 263 #
260 264 # An even better solution would be to teach all consumers of rename
261 265 # metadata that the base revision may not be available.
262 266 #
263 267 # TODO consider better ways of doing this.
264 268 if res and not self._narrowmatch(res[0]):
265 269 return None
266 270
267 271 return res
268 272
269 273 def size(self, rev):
270 274 # Because we have a custom renamed() that may lie, we need to call
271 275 # the base renamed() to report accurate results.
272 276 node = self.node(rev)
273 277 if super(narrowfilelog, self).renamed(node):
274 278 return len(self.read(node))
275 279 else:
276 280 return super(narrowfilelog, self).size(rev)
277 281
278 282 def cmp(self, node, text):
279 283 # We don't call `super` because narrow parents can be buggy in case of a
280 284 # ambiguous dirstate. Always take the slow path until there is a better
281 285 # fix, see issue6150.
282 286
283 287 # Censored files compare against the empty file.
284 288 if self.iscensored(self.rev(node)):
285 289 return text != b''
286 290
287 291 return self.read(node) != text
@@ -1,802 +1,886 b''
1 1 # censor code related to censoring revision
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
5 5 # Copyright 2015 Google, Inc <martinvonz@google.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 import binascii
11 11 import contextlib
12 12 import os
13 13 import struct
14 14
15 15 from ..node import (
16 16 nullrev,
17 17 )
18 18 from .constants import (
19 19 COMP_MODE_PLAIN,
20 20 ENTRY_DATA_COMPRESSED_LENGTH,
21 21 ENTRY_DATA_COMPRESSION_MODE,
22 22 ENTRY_DATA_OFFSET,
23 23 ENTRY_DATA_UNCOMPRESSED_LENGTH,
24 24 ENTRY_DELTA_BASE,
25 25 ENTRY_LINK_REV,
26 26 ENTRY_NODE_ID,
27 27 ENTRY_PARENT_1,
28 28 ENTRY_PARENT_2,
29 29 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
30 30 ENTRY_SIDEDATA_COMPRESSION_MODE,
31 31 ENTRY_SIDEDATA_OFFSET,
32 REVIDX_ISCENSORED,
32 33 REVLOGV0,
33 34 REVLOGV1,
34 35 )
35 36 from ..i18n import _
36 37
37 38 from .. import (
38 39 error,
40 mdiff,
39 41 pycompat,
40 42 revlogutils,
41 43 util,
42 44 )
43 45 from ..utils import (
44 46 storageutil,
45 47 )
46 48 from . import (
47 49 constants,
48 50 deltas,
49 51 )
50 52
51 53
52 54 def v1_censor(rl, tr, censornode, tombstone=b''):
53 55 """censors a revision in a "version 1" revlog"""
54 56 assert rl._format_version == constants.REVLOGV1, rl._format_version
55 57
56 58 # avoid cycle
57 59 from .. import revlog
58 60
59 61 censorrev = rl.rev(censornode)
60 62 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
61 63
62 64 # Rewriting the revlog in place is hard. Our strategy for censoring is
63 65 # to create a new revlog, copy all revisions to it, then replace the
64 66 # revlogs on transaction close.
65 67 #
66 68 # This is a bit dangerous. We could easily have a mismatch of state.
67 69 newrl = revlog.revlog(
68 70 rl.opener,
69 71 target=rl.target,
70 72 radix=rl.radix,
71 73 postfix=b'tmpcensored',
72 74 censorable=True,
73 75 )
74 76 newrl._format_version = rl._format_version
75 77 newrl._format_flags = rl._format_flags
76 78 newrl._generaldelta = rl._generaldelta
77 79 newrl._parse_index = rl._parse_index
78 80
79 81 for rev in rl.revs():
80 82 node = rl.node(rev)
81 83 p1, p2 = rl.parents(node)
82 84
83 85 if rev == censorrev:
84 86 newrl.addrawrevision(
85 87 tombstone,
86 88 tr,
87 89 rl.linkrev(censorrev),
88 90 p1,
89 91 p2,
90 92 censornode,
91 93 constants.REVIDX_ISCENSORED,
92 94 )
93 95
94 96 if newrl.deltaparent(rev) != nullrev:
95 97 m = _(b'censored revision stored as delta; cannot censor')
96 98 h = _(
97 99 b'censoring of revlogs is not fully implemented;'
98 100 b' please report this bug'
99 101 )
100 102 raise error.Abort(m, hint=h)
101 103 continue
102 104
103 105 if rl.iscensored(rev):
104 106 if rl.deltaparent(rev) != nullrev:
105 107 m = _(
106 108 b'cannot censor due to censored '
107 109 b'revision having delta stored'
108 110 )
109 111 raise error.Abort(m)
110 112 rawtext = rl._chunk(rev)
111 113 else:
112 114 rawtext = rl.rawdata(rev)
113 115
114 116 newrl.addrawrevision(
115 117 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
116 118 )
117 119
118 120 tr.addbackup(rl._indexfile, location=b'store')
119 121 if not rl._inline:
120 122 tr.addbackup(rl._datafile, location=b'store')
121 123
122 124 rl.opener.rename(newrl._indexfile, rl._indexfile)
123 125 if not rl._inline:
124 126 rl.opener.rename(newrl._datafile, rl._datafile)
125 127
126 128 rl.clearcaches()
127 129 rl._loadindex()
128 130
129 131
130 132 def v2_censor(revlog, tr, censornode, tombstone=b''):
131 133 """censors a revision in a "version 2" revlog"""
132 134 assert revlog._format_version != REVLOGV0, revlog._format_version
133 135 assert revlog._format_version != REVLOGV1, revlog._format_version
134 136
135 137 censor_revs = {revlog.rev(censornode)}
136 138 _rewrite_v2(revlog, tr, censor_revs, tombstone)
137 139
138 140
139 141 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
140 142 """rewrite a revlog to censor some of its content
141 143
142 144 General principle
143 145
144 146 We create new revlog files (index/data/sidedata) to copy the content of
145 147 the existing data without the censored data.
146 148
147 149 We need to recompute new delta for any revision that used the censored
148 150 revision as delta base. As the cumulative size of the new delta may be
149 151 large, we store them in a temporary file until they are stored in their
150 152 final destination.
151 153
152 154 All data before the censored data can be blindly copied. The rest needs
153 155 to be copied as we go and the associated index entry needs adjustement.
154 156 """
155 157 assert revlog._format_version != REVLOGV0, revlog._format_version
156 158 assert revlog._format_version != REVLOGV1, revlog._format_version
157 159
158 160 old_index = revlog.index
159 161 docket = revlog._docket
160 162
161 163 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
162 164
163 165 first_excl_rev = min(censor_revs)
164 166
165 167 first_excl_entry = revlog.index[first_excl_rev]
166 168 index_cutoff = revlog.index.entry_size * first_excl_rev
167 169 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
168 170 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
169 171
170 172 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
171 173 # rev → (new_base, data_start, data_end, compression_mode)
172 174 rewritten_entries = _precompute_rewritten_delta(
173 175 revlog,
174 176 old_index,
175 177 censor_revs,
176 178 tmp_storage,
177 179 )
178 180
179 181 all_files = _setup_new_files(
180 182 revlog,
181 183 index_cutoff,
182 184 data_cutoff,
183 185 sidedata_cutoff,
184 186 )
185 187
186 188 # we dont need to open the old index file since its content already
187 189 # exist in a usable form in `old_index`.
188 190 with all_files() as open_files:
189 191 (
190 192 old_data_file,
191 193 old_sidedata_file,
192 194 new_index_file,
193 195 new_data_file,
194 196 new_sidedata_file,
195 197 ) = open_files
196 198
197 199 # writing the censored revision
198 200
199 201 # Writing all subsequent revisions
200 202 for rev in range(first_excl_rev, len(old_index)):
201 203 if rev in censor_revs:
202 204 _rewrite_censor(
203 205 revlog,
204 206 old_index,
205 207 open_files,
206 208 rev,
207 209 tombstone,
208 210 )
209 211 else:
210 212 _rewrite_simple(
211 213 revlog,
212 214 old_index,
213 215 open_files,
214 216 rev,
215 217 rewritten_entries,
216 218 tmp_storage,
217 219 )
218 220 docket.write(transaction=None, stripping=True)
219 221
220 222
221 223 def _precompute_rewritten_delta(
222 224 revlog,
223 225 old_index,
224 226 excluded_revs,
225 227 tmp_storage,
226 228 ):
227 229 """Compute new delta for revisions whose delta is based on revision that
228 230 will not survive as is.
229 231
230 232 Return a mapping: {rev → (new_base, data_start, data_end, compression_mode)}
231 233 """
232 234 dc = deltas.deltacomputer(revlog)
233 235 rewritten_entries = {}
234 236 first_excl_rev = min(excluded_revs)
235 237 with revlog._segmentfile._open_read() as dfh:
236 238 for rev in range(first_excl_rev, len(old_index)):
237 239 if rev in excluded_revs:
238 240 # this revision will be preserved as is, so we don't need to
239 241 # consider recomputing a delta.
240 242 continue
241 243 entry = old_index[rev]
242 244 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
243 245 continue
244 246 # This is a revision that use the censored revision as the base
245 247 # for its delta. We need a need new deltas
246 248 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
247 249 # this revision is empty, we can delta against nullrev
248 250 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
249 251 else:
250 252
251 253 text = revlog.rawdata(rev, _df=dfh)
252 254 info = revlogutils.revisioninfo(
253 255 node=entry[ENTRY_NODE_ID],
254 256 p1=revlog.node(entry[ENTRY_PARENT_1]),
255 257 p2=revlog.node(entry[ENTRY_PARENT_2]),
256 258 btext=[text],
257 259 textlen=len(text),
258 260 cachedelta=None,
259 261 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
260 262 )
261 263 d = dc.finddeltainfo(
262 264 info, dfh, excluded_bases=excluded_revs, target_rev=rev
263 265 )
264 266 default_comp = revlog._docket.default_compression_header
265 267 comp_mode, d = deltas.delta_compression(default_comp, d)
266 268 # using `tell` is a bit lazy, but we are not here for speed
267 269 start = tmp_storage.tell()
268 270 tmp_storage.write(d.data[1])
269 271 end = tmp_storage.tell()
270 272 rewritten_entries[rev] = (d.base, start, end, comp_mode)
271 273 return rewritten_entries
272 274
273 275
274 276 def _setup_new_files(
275 277 revlog,
276 278 index_cutoff,
277 279 data_cutoff,
278 280 sidedata_cutoff,
279 281 ):
280 282 """
281 283
282 284 return a context manager to open all the relevant files:
283 285 - old_data_file,
284 286 - old_sidedata_file,
285 287 - new_index_file,
286 288 - new_data_file,
287 289 - new_sidedata_file,
288 290
289 291 The old_index_file is not here because it is accessed through the
290 292 `old_index` object if the caller function.
291 293 """
292 294 docket = revlog._docket
293 295 old_index_filepath = revlog.opener.join(docket.index_filepath())
294 296 old_data_filepath = revlog.opener.join(docket.data_filepath())
295 297 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
296 298
297 299 new_index_filepath = revlog.opener.join(docket.new_index_file())
298 300 new_data_filepath = revlog.opener.join(docket.new_data_file())
299 301 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
300 302
301 303 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
302 304 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
303 305 util.copyfile(
304 306 old_sidedata_filepath,
305 307 new_sidedata_filepath,
306 308 nb_bytes=sidedata_cutoff,
307 309 )
308 310 revlog.opener.register_file(docket.index_filepath())
309 311 revlog.opener.register_file(docket.data_filepath())
310 312 revlog.opener.register_file(docket.sidedata_filepath())
311 313
312 314 docket.index_end = index_cutoff
313 315 docket.data_end = data_cutoff
314 316 docket.sidedata_end = sidedata_cutoff
315 317
316 318 # reload the revlog internal information
317 319 revlog.clearcaches()
318 320 revlog._loadindex(docket=docket)
319 321
320 322 @contextlib.contextmanager
321 323 def all_files_opener():
322 324 # hide opening in an helper function to please check-code, black
323 325 # and various python version at the same time
324 326 with open(old_data_filepath, 'rb') as old_data_file:
325 327 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
326 328 with open(new_index_filepath, 'r+b') as new_index_file:
327 329 with open(new_data_filepath, 'r+b') as new_data_file:
328 330 with open(
329 331 new_sidedata_filepath, 'r+b'
330 332 ) as new_sidedata_file:
331 333 new_index_file.seek(0, os.SEEK_END)
332 334 assert new_index_file.tell() == index_cutoff
333 335 new_data_file.seek(0, os.SEEK_END)
334 336 assert new_data_file.tell() == data_cutoff
335 337 new_sidedata_file.seek(0, os.SEEK_END)
336 338 assert new_sidedata_file.tell() == sidedata_cutoff
337 339 yield (
338 340 old_data_file,
339 341 old_sidedata_file,
340 342 new_index_file,
341 343 new_data_file,
342 344 new_sidedata_file,
343 345 )
344 346
345 347 return all_files_opener
346 348
347 349
348 350 def _rewrite_simple(
349 351 revlog,
350 352 old_index,
351 353 all_files,
352 354 rev,
353 355 rewritten_entries,
354 356 tmp_storage,
355 357 ):
356 358 """append a normal revision to the index after the rewritten one(s)"""
357 359 (
358 360 old_data_file,
359 361 old_sidedata_file,
360 362 new_index_file,
361 363 new_data_file,
362 364 new_sidedata_file,
363 365 ) = all_files
364 366 entry = old_index[rev]
365 367 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
366 368 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
367 369
368 370 if rev not in rewritten_entries:
369 371 old_data_file.seek(old_data_offset)
370 372 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
371 373 new_data = old_data_file.read(new_data_size)
372 374 data_delta_base = entry[ENTRY_DELTA_BASE]
373 375 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
374 376 else:
375 377 (
376 378 data_delta_base,
377 379 start,
378 380 end,
379 381 d_comp_mode,
380 382 ) = rewritten_entries[rev]
381 383 new_data_size = end - start
382 384 tmp_storage.seek(start)
383 385 new_data = tmp_storage.read(new_data_size)
384 386
385 387 # It might be faster to group continuous read/write operation,
386 388 # however, this is censor, an operation that is not focussed
387 389 # around stellar performance. So I have not written this
388 390 # optimisation yet.
389 391 new_data_offset = new_data_file.tell()
390 392 new_data_file.write(new_data)
391 393
392 394 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
393 395 new_sidedata_offset = new_sidedata_file.tell()
394 396 if 0 < sidedata_size:
395 397 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
396 398 old_sidedata_file.seek(old_sidedata_offset)
397 399 new_sidedata = old_sidedata_file.read(sidedata_size)
398 400 new_sidedata_file.write(new_sidedata)
399 401
400 402 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
401 403 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
402 404 assert data_delta_base <= rev, (data_delta_base, rev)
403 405
404 406 new_entry = revlogutils.entry(
405 407 flags=flags,
406 408 data_offset=new_data_offset,
407 409 data_compressed_length=new_data_size,
408 410 data_uncompressed_length=data_uncompressed_length,
409 411 data_delta_base=data_delta_base,
410 412 link_rev=entry[ENTRY_LINK_REV],
411 413 parent_rev_1=entry[ENTRY_PARENT_1],
412 414 parent_rev_2=entry[ENTRY_PARENT_2],
413 415 node_id=entry[ENTRY_NODE_ID],
414 416 sidedata_offset=new_sidedata_offset,
415 417 sidedata_compressed_length=sidedata_size,
416 418 data_compression_mode=d_comp_mode,
417 419 sidedata_compression_mode=sd_com_mode,
418 420 )
419 421 revlog.index.append(new_entry)
420 422 entry_bin = revlog.index.entry_binary(rev)
421 423 new_index_file.write(entry_bin)
422 424
423 425 revlog._docket.index_end = new_index_file.tell()
424 426 revlog._docket.data_end = new_data_file.tell()
425 427 revlog._docket.sidedata_end = new_sidedata_file.tell()
426 428
427 429
428 430 def _rewrite_censor(
429 431 revlog,
430 432 old_index,
431 433 all_files,
432 434 rev,
433 435 tombstone,
434 436 ):
435 437 """rewrite and append a censored revision"""
436 438 (
437 439 old_data_file,
438 440 old_sidedata_file,
439 441 new_index_file,
440 442 new_data_file,
441 443 new_sidedata_file,
442 444 ) = all_files
443 445 entry = old_index[rev]
444 446
445 447 # XXX consider trying the default compression too
446 448 new_data_size = len(tombstone)
447 449 new_data_offset = new_data_file.tell()
448 450 new_data_file.write(tombstone)
449 451
450 452 # we are not adding any sidedata as they might leak info about the censored version
451 453
452 454 link_rev = entry[ENTRY_LINK_REV]
453 455
454 456 p1 = entry[ENTRY_PARENT_1]
455 457 p2 = entry[ENTRY_PARENT_2]
456 458
457 459 new_entry = revlogutils.entry(
458 460 flags=constants.REVIDX_ISCENSORED,
459 461 data_offset=new_data_offset,
460 462 data_compressed_length=new_data_size,
461 463 data_uncompressed_length=new_data_size,
462 464 data_delta_base=rev,
463 465 link_rev=link_rev,
464 466 parent_rev_1=p1,
465 467 parent_rev_2=p2,
466 468 node_id=entry[ENTRY_NODE_ID],
467 469 sidedata_offset=0,
468 470 sidedata_compressed_length=0,
469 471 data_compression_mode=COMP_MODE_PLAIN,
470 472 sidedata_compression_mode=COMP_MODE_PLAIN,
471 473 )
472 474 revlog.index.append(new_entry)
473 475 entry_bin = revlog.index.entry_binary(rev)
474 476 new_index_file.write(entry_bin)
475 477 revlog._docket.index_end = new_index_file.tell()
476 478 revlog._docket.data_end = new_data_file.tell()
477 479
478 480
479 481 def _get_filename_from_filelog_index(path):
480 482 # Drop the extension and the `data/` prefix
481 483 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
482 484 if len(path_part) < 2:
483 485 msg = _(b"cannot recognize filelog from filename: '%s'")
484 486 msg %= path
485 487 raise error.Abort(msg)
486 488
487 489 return path_part[1]
488 490
489 491
490 492 def _filelog_from_filename(repo, path):
491 493 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
492 494
493 495 from .. import filelog # avoid cycle
494 496
495 497 fl = filelog.filelog(repo.svfs, path)
496 498 return fl
497 499
498 500
499 501 def _write_swapped_parents(repo, rl, rev, offset, fp):
500 502 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
501 503 from ..pure import parsers # avoid cycle
502 504
503 505 if repo._currentlock(repo._lockref) is None:
504 506 # Let's be paranoid about it
505 507 msg = "repo needs to be locked to rewrite parents"
506 508 raise error.ProgrammingError(msg)
507 509
508 510 index_format = parsers.IndexObject.index_format
509 511 entry = rl.index[rev]
510 512 new_entry = list(entry)
511 513 new_entry[5], new_entry[6] = entry[6], entry[5]
512 514 packed = index_format.pack(*new_entry[:8])
513 515 fp.seek(offset)
514 516 fp.write(packed)
515 517
516 518
517 519 def _reorder_filelog_parents(repo, fl, to_fix):
518 520 """
519 521 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
520 522 new version to disk, overwriting the old one with a rename.
521 523 """
522 524 from ..pure import parsers # avoid cycle
523 525
524 526 ui = repo.ui
525 527 assert len(to_fix) > 0
526 528 rl = fl._revlog
527 529 if rl._format_version != constants.REVLOGV1:
528 530 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
529 531 raise error.ProgrammingError(msg)
530 532
531 533 index_file = rl._indexfile
532 534 new_file_path = index_file + b'.tmp-parents-fix'
533 535 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
534 536
535 537 with ui.uninterruptible():
536 538 try:
537 539 util.copyfile(
538 540 rl.opener.join(index_file),
539 541 rl.opener.join(new_file_path),
540 542 checkambig=rl._checkambig,
541 543 )
542 544
543 545 with rl.opener(new_file_path, mode=b"r+") as fp:
544 546 if rl._inline:
545 547 index = parsers.InlinedIndexObject(fp.read())
546 548 for rev in fl.revs():
547 549 if rev in to_fix:
548 550 offset = index._calculate_index(rev)
549 551 _write_swapped_parents(repo, rl, rev, offset, fp)
550 552 ui.write(repaired_msg % (rev, index_file))
551 553 else:
552 554 index_format = parsers.IndexObject.index_format
553 555 for rev in to_fix:
554 556 offset = rev * index_format.size
555 557 _write_swapped_parents(repo, rl, rev, offset, fp)
556 558 ui.write(repaired_msg % (rev, index_file))
557 559
558 560 rl.opener.rename(new_file_path, index_file)
559 561 rl.clearcaches()
560 562 rl._loadindex()
561 563 finally:
562 564 util.tryunlink(new_file_path)
563 565
564 566
565 567 def _is_revision_affected(fl, filerev, metadata_cache=None):
566 568 full_text = lambda: fl._revlog.rawdata(filerev)
567 569 parent_revs = lambda: fl._revlog.parentrevs(filerev)
568 570 return _is_revision_affected_inner(
569 571 full_text, parent_revs, filerev, metadata_cache
570 572 )
571 573
572 574
573 575 def _is_revision_affected_inner(
574 576 full_text,
575 577 parents_revs,
576 578 filerev,
577 579 metadata_cache=None,
578 580 ):
579 581 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
580 582 special meaning compared to the reverse in the context of filelog-based
581 583 copytracing. issue6528 exists because new code assumed that parent ordering
582 584 didn't matter, so this detects if the revision contains metadata (since
583 585 it's only used for filelog-based copytracing) and its parents are in the
584 586 "wrong" order."""
585 587 try:
586 588 raw_text = full_text()
587 589 except error.CensoredNodeError:
588 590 # We don't care about censored nodes as they never carry metadata
589 591 return False
590 592 has_meta = raw_text.startswith(b'\x01\n')
591 593 if metadata_cache is not None:
592 594 metadata_cache[filerev] = has_meta
593 595 if has_meta:
594 596 (p1, p2) = parents_revs()
595 597 if p1 != nullrev and p2 == nullrev:
596 598 return True
597 599 return False
598 600
599 601
600 602 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
601 603 rl = fl._revlog
602 604 is_censored = lambda: rl.iscensored(filerev)
603 605 delta_base = lambda: rl.deltaparent(filerev)
604 606 delta = lambda: rl._chunk(filerev)
605 607 full_text = lambda: rl.rawdata(filerev)
606 608 parent_revs = lambda: rl.parentrevs(filerev)
607 609 return _is_revision_affected_fast_inner(
608 610 is_censored,
609 611 delta_base,
610 612 delta,
611 613 full_text,
612 614 parent_revs,
613 615 filerev,
614 616 metadata_cache,
615 617 )
616 618
617 619
618 620 def _is_revision_affected_fast_inner(
619 621 is_censored,
620 622 delta_base,
621 623 delta,
622 624 full_text,
623 625 parent_revs,
624 626 filerev,
625 627 metadata_cache,
626 628 ):
627 629 """Optimization fast-path for `_is_revision_affected`.
628 630
629 631 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
630 632 revision to check if its base has metadata, saving computation of the full
631 633 text, instead looking at the current delta.
632 634
633 635 This optimization only works if the revisions are looked at in order."""
634 636
635 637 if is_censored():
636 638 # Censored revisions don't contain metadata, so they cannot be affected
637 639 metadata_cache[filerev] = False
638 640 return False
639 641
640 642 p1, p2 = parent_revs()
641 643 if p1 == nullrev or p2 != nullrev:
642 644 return False
643 645
644 646 delta_parent = delta_base()
645 647 parent_has_metadata = metadata_cache.get(delta_parent)
646 648 if parent_has_metadata is None:
647 649 return _is_revision_affected_inner(
648 650 full_text,
649 651 parent_revs,
650 652 filerev,
651 653 metadata_cache,
652 654 )
653 655
654 656 chunk = delta()
655 657 if not len(chunk):
656 658 # No diff for this revision
657 659 return parent_has_metadata
658 660
659 661 header_length = 12
660 662 if len(chunk) < header_length:
661 663 raise error.Abort(_(b"patch cannot be decoded"))
662 664
663 665 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
664 666
665 667 if start < 2: # len(b'\x01\n') == 2
666 668 # This delta does *something* to the metadata marker (if any).
667 669 # Check it the slow way
668 670 is_affected = _is_revision_affected_inner(
669 671 full_text,
670 672 parent_revs,
671 673 filerev,
672 674 metadata_cache,
673 675 )
674 676 return is_affected
675 677
676 678 # The diff did not remove or add the metadata header, it's then in the same
677 679 # situation as its parent
678 680 metadata_cache[filerev] = parent_has_metadata
679 681 return parent_has_metadata
680 682
681 683
682 684 def _from_report(ui, repo, context, from_report, dry_run):
683 685 """
684 686 Fix the revisions given in the `from_report` file, but still checks if the
685 687 revisions are indeed affected to prevent an unfortunate cyclic situation
686 688 where we'd swap well-ordered parents again.
687 689
688 690 See the doc for `debug_fix_issue6528` for the format documentation.
689 691 """
690 692 ui.write(_(b"loading report file '%s'\n") % from_report)
691 693
692 694 with context(), open(from_report, mode='rb') as f:
693 695 for line in f.read().split(b'\n'):
694 696 if not line:
695 697 continue
696 698 filenodes, filename = line.split(b' ', 1)
697 699 fl = _filelog_from_filename(repo, filename)
698 700 to_fix = set(
699 701 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
700 702 )
701 703 excluded = set()
702 704
703 705 for filerev in to_fix:
704 706 if _is_revision_affected(fl, filerev):
705 707 msg = b"found affected revision %d for filelog '%s'\n"
706 708 ui.warn(msg % (filerev, filename))
707 709 else:
708 710 msg = _(b"revision %s of file '%s' is not affected\n")
709 711 msg %= (binascii.hexlify(fl.node(filerev)), filename)
710 712 ui.warn(msg)
711 713 excluded.add(filerev)
712 714
713 715 to_fix = to_fix - excluded
714 716 if not to_fix:
715 717 msg = _(b"no affected revisions were found for '%s'\n")
716 718 ui.write(msg % filename)
717 719 continue
718 720 if not dry_run:
719 721 _reorder_filelog_parents(repo, fl, sorted(to_fix))
720 722
721 723
724 def filter_delta_issue6528(revlog, deltas_iter):
725 """filter incomind deltas to repaire issue 6528 on the fly"""
726 metadata_cache = {}
727
728 deltacomputer = deltas.deltacomputer(revlog)
729
730 for rev, d in enumerate(deltas_iter, len(revlog)):
731 (
732 node,
733 p1_node,
734 p2_node,
735 linknode,
736 deltabase,
737 delta,
738 flags,
739 sidedata,
740 ) = d
741
742 if not revlog.index.has_node(deltabase):
743 raise error.LookupError(
744 deltabase, revlog.radix, _(b'unknown parent')
745 )
746 base_rev = revlog.rev(deltabase)
747 if not revlog.index.has_node(p1_node):
748 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
749 p1_rev = revlog.rev(p1_node)
750 if not revlog.index.has_node(p2_node):
751 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
752 p2_rev = revlog.rev(p2_node)
753
754 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
755 delta_base = lambda: revlog.rev(delta_base)
756 delta_base = lambda: base_rev
757 parent_revs = lambda: (p1_rev, p2_rev)
758
759 def full_text():
760 # note: being able to reuse the full text computation in the
761 # underlying addrevision would be useful however this is a bit too
762 # intrusive the for the "quick" issue6528 we are writing before the
763 # 5.8 release
764 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
765
766 revinfo = revlogutils.revisioninfo(
767 node,
768 p1_node,
769 p2_node,
770 [None],
771 textlen,
772 (base_rev, delta),
773 flags,
774 )
775 # cached by the global "writing" context
776 assert revlog._writinghandles is not None
777 if revlog._inline:
778 fh = revlog._writinghandles[0]
779 else:
780 fh = revlog._writinghandles[1]
781 return deltacomputer.buildtext(revinfo, fh)
782
783 is_affected = _is_revision_affected_fast_inner(
784 is_censored,
785 delta_base,
786 lambda: delta,
787 full_text,
788 parent_revs,
789 rev,
790 metadata_cache,
791 )
792 if is_affected:
793 d = (
794 node,
795 p2_node,
796 p1_node,
797 linknode,
798 deltabase,
799 delta,
800 flags,
801 sidedata,
802 )
803 yield d
804
805
722 806 def repair_issue6528(
723 807 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
724 808 ):
725 809 from .. import store # avoid cycle
726 810
727 811 @contextlib.contextmanager
728 812 def context():
729 813 if dry_run or to_report: # No need for locking
730 814 yield
731 815 else:
732 816 with repo.wlock(), repo.lock():
733 817 yield
734 818
735 819 if from_report:
736 820 return _from_report(ui, repo, context, from_report, dry_run)
737 821
738 822 report_entries = []
739 823
740 824 with context():
741 825 files = list(
742 826 (file_type, path)
743 827 for (file_type, path, _e, _s) in repo.store.datafiles()
744 828 if path.endswith(b'.i') and file_type & store.FILEFLAGS_FILELOG
745 829 )
746 830
747 831 progress = ui.makeprogress(
748 832 _(b"looking for affected revisions"),
749 833 unit=_(b"filelogs"),
750 834 total=len(files),
751 835 )
752 836 found_nothing = True
753 837
754 838 for file_type, path in files:
755 839 if (
756 840 not path.endswith(b'.i')
757 841 or not file_type & store.FILEFLAGS_FILELOG
758 842 ):
759 843 continue
760 844 progress.increment()
761 845 filename = _get_filename_from_filelog_index(path)
762 846 fl = _filelog_from_filename(repo, filename)
763 847
764 848 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
765 849 to_fix = set()
766 850 metadata_cache = {}
767 851 for filerev in fl.revs():
768 852 affected = _is_revision_affected_fast(
769 853 repo, fl, filerev, metadata_cache
770 854 )
771 855 if paranoid:
772 856 slow = _is_revision_affected(fl, filerev)
773 857 if slow != affected:
774 858 msg = _(b"paranoid check failed for '%s' at node %s")
775 859 node = binascii.hexlify(fl.node(filerev))
776 860 raise error.Abort(msg % (filename, node))
777 861 if affected:
778 862 msg = b"found affected revision %d for filelog '%s'\n"
779 863 ui.warn(msg % (filerev, path))
780 864 found_nothing = False
781 865 if not dry_run:
782 866 if to_report:
783 867 to_fix.add(binascii.hexlify(fl.node(filerev)))
784 868 else:
785 869 to_fix.add(filerev)
786 870
787 871 if to_fix:
788 872 to_fix = sorted(to_fix)
789 873 if to_report:
790 874 report_entries.append((filename, to_fix))
791 875 else:
792 876 _reorder_filelog_parents(repo, fl, to_fix)
793 877
794 878 if found_nothing:
795 879 ui.write(_(b"no affected revisions were found\n"))
796 880
797 881 if to_report and report_entries:
798 882 with open(to_report, mode="wb") as f:
799 883 for path, to_fix in report_entries:
800 884 f.write(b"%s %s\n" % (b",".join(to_fix), path))
801 885
802 886 progress.complete()
@@ -1,433 +1,526 b''
1 1 ===============================================================
2 2 Test non-regression on the corruption associated with issue6528
3 3 ===============================================================
4 4
5 5 Setup
6 6 =====
7 7
8 8 $ hg init base-repo
9 9 $ cd base-repo
10 10
11 11 $ cat <<EOF > a.txt
12 12 > 1
13 13 > 2
14 14 > 3
15 15 > 4
16 16 > 5
17 17 > 6
18 18 > EOF
19 19
20 20 $ hg add a.txt
21 21 $ hg commit -m 'c_base_c - create a.txt'
22 22
23 23 Modify a.txt
24 24
25 25 $ sed -e 's/1/foo/' a.txt > a.tmp; mv a.tmp a.txt
26 26 $ hg commit -m 'c_modify_c - modify a.txt'
27 27
28 28 Modify and rename a.txt to b.txt
29 29
30 30 $ hg up -r "desc('c_base_c')"
31 31 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
32 32 $ sed -e 's/6/bar/' a.txt > a.tmp; mv a.tmp a.txt
33 33 $ hg mv a.txt b.txt
34 34 $ hg commit -m 'c_rename_c - rename and modify a.txt to b.txt'
35 35 created new head
36 36
37 37 Merge each branch
38 38
39 39 $ hg merge -r "desc('c_modify_c')"
40 40 merging b.txt and a.txt to b.txt
41 41 0 files updated, 1 files merged, 0 files removed, 0 files unresolved
42 42 (branch merge, don't forget to commit)
43 43 $ hg commit -m 'c_merge_c: commit merge'
44 44
45 45 $ hg debugrevlogindex b.txt
46 46 rev linkrev nodeid p1 p2
47 47 0 2 05b806ebe5ea 000000000000 000000000000
48 48 1 3 a58b36ad6b65 000000000000 05b806ebe5ea
49 49
50 50 Check commit Graph
51 51
52 52 $ hg log -G
53 53 @ changeset: 3:a1cc2bdca0aa
54 54 |\ tag: tip
55 55 | | parent: 2:615c6ccefd15
56 56 | | parent: 1:373d507f4667
57 57 | | user: test
58 58 | | date: Thu Jan 01 00:00:00 1970 +0000
59 59 | | summary: c_merge_c: commit merge
60 60 | |
61 61 | o changeset: 2:615c6ccefd15
62 62 | | parent: 0:f5a5a568022f
63 63 | | user: test
64 64 | | date: Thu Jan 01 00:00:00 1970 +0000
65 65 | | summary: c_rename_c - rename and modify a.txt to b.txt
66 66 | |
67 67 o | changeset: 1:373d507f4667
68 68 |/ user: test
69 69 | date: Thu Jan 01 00:00:00 1970 +0000
70 70 | summary: c_modify_c - modify a.txt
71 71 |
72 72 o changeset: 0:f5a5a568022f
73 73 user: test
74 74 date: Thu Jan 01 00:00:00 1970 +0000
75 75 summary: c_base_c - create a.txt
76 76
77 77
78 78 $ hg cat -r . b.txt
79 79 foo
80 80 2
81 81 3
82 82 4
83 83 5
84 84 bar
85 85 $ cat b.txt
86 86 foo
87 87 2
88 88 3
89 89 4
90 90 5
91 91 bar
92 92 $ cd ..
93 93
94 94
95 95 Check the lack of corruption
96 96 ============================
97 97
98 98 $ hg clone --pull base-repo cloned
99 99 requesting all changes
100 100 adding changesets
101 101 adding manifests
102 102 adding file changes
103 103 added 4 changesets with 4 changes to 2 files
104 104 new changesets f5a5a568022f:a1cc2bdca0aa
105 105 updating to branch default
106 106 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
107 107 $ cd cloned
108 108 $ hg up -r "desc('c_merge_c')"
109 109 0 files updated, 0 files merged, 0 files removed, 0 files unresolved
110 110
111 111
112 112 Status is buggy, even with debugrebuilddirstate
113 113
114 114 $ hg cat -r . b.txt
115 115 foo
116 116 2
117 117 3
118 118 4
119 119 5
120 120 bar
121 121 $ cat b.txt
122 122 foo
123 123 2
124 124 3
125 125 4
126 126 5
127 127 bar
128 128 $ hg status
129 129 $ hg debugrebuilddirstate
130 130 $ hg status
131 131
132 132 the history was altered
133 133
134 134 in theory p1/p2 order does not matter but in practice p1 == nullid is used as a
135 135 marker that some metadata are present and should be fetched.
136 136
137 137 $ hg debugrevlogindex b.txt
138 138 rev linkrev nodeid p1 p2
139 139 0 2 05b806ebe5ea 000000000000 000000000000
140 140 1 3 a58b36ad6b65 000000000000 05b806ebe5ea
141 141
142 142 Check commit Graph
143 143
144 144 $ hg log -G
145 145 @ changeset: 3:a1cc2bdca0aa
146 146 |\ tag: tip
147 147 | | parent: 2:615c6ccefd15
148 148 | | parent: 1:373d507f4667
149 149 | | user: test
150 150 | | date: Thu Jan 01 00:00:00 1970 +0000
151 151 | | summary: c_merge_c: commit merge
152 152 | |
153 153 | o changeset: 2:615c6ccefd15
154 154 | | parent: 0:f5a5a568022f
155 155 | | user: test
156 156 | | date: Thu Jan 01 00:00:00 1970 +0000
157 157 | | summary: c_rename_c - rename and modify a.txt to b.txt
158 158 | |
159 159 o | changeset: 1:373d507f4667
160 160 |/ user: test
161 161 | date: Thu Jan 01 00:00:00 1970 +0000
162 162 | summary: c_modify_c - modify a.txt
163 163 |
164 164 o changeset: 0:f5a5a568022f
165 165 user: test
166 166 date: Thu Jan 01 00:00:00 1970 +0000
167 167 summary: c_base_c - create a.txt
168 168
169 169
170 170 Test the command that fixes the issue
171 171 =====================================
172 172
173 173 Restore a broken repository with multiple broken revisions and a filename that
174 174 would get encoded to test the `report` options.
175 175 It's a tarball because unbundle might magically fix the issue later.
176 176
177 177 $ cd ..
178 178 $ mkdir repo-to-fix
179 179 $ cd repo-to-fix
180 180 #if windows
181 181 tar interprets `:` in paths (like `C:`) as being remote, force local on Windows
182 182 only since some versions of tar don't have this flag.
183 183
184 184 $ tar --force-local -xf $TESTDIR/bundles/issue6528.tar
185 185 #else
186 186 $ tar xf $TESTDIR/bundles/issue6528.tar
187 187 #endif
188 188
189 189 Check that the issue is present
190 190 $ hg st
191 191 M D.txt
192 192 M b.txt
193 193 $ hg debugrevlogindex b.txt
194 194 rev linkrev nodeid p1 p2
195 195 0 2 05b806ebe5ea 000000000000 000000000000
196 196 1 3 a58b36ad6b65 05b806ebe5ea 000000000000
197 197 2 6 216a5fe8b8ed 000000000000 000000000000
198 198 3 7 ea4f2f2463cc 216a5fe8b8ed 000000000000
199 199 $ hg debugrevlogindex D.txt
200 200 rev linkrev nodeid p1 p2
201 201 0 6 2a8d3833f2fb 000000000000 000000000000
202 202 1 7 2a80419dfc31 2a8d3833f2fb 000000000000
203 203
204 204 Dry-run the fix
205 205 $ hg debug-repair-issue6528 --dry-run
206 206 found affected revision 1 for filelog 'data/D.txt.i'
207 207 found affected revision 1 for filelog 'data/b.txt.i'
208 208 found affected revision 3 for filelog 'data/b.txt.i'
209 209 $ hg st
210 210 M D.txt
211 211 M b.txt
212 212 $ hg debugrevlogindex b.txt
213 213 rev linkrev nodeid p1 p2
214 214 0 2 05b806ebe5ea 000000000000 000000000000
215 215 1 3 a58b36ad6b65 05b806ebe5ea 000000000000
216 216 2 6 216a5fe8b8ed 000000000000 000000000000
217 217 3 7 ea4f2f2463cc 216a5fe8b8ed 000000000000
218 218 $ hg debugrevlogindex D.txt
219 219 rev linkrev nodeid p1 p2
220 220 0 6 2a8d3833f2fb 000000000000 000000000000
221 221 1 7 2a80419dfc31 2a8d3833f2fb 000000000000
222 222
223 223 Test the --paranoid option
224 224 $ hg debug-repair-issue6528 --dry-run --paranoid
225 225 found affected revision 1 for filelog 'data/D.txt.i'
226 226 found affected revision 1 for filelog 'data/b.txt.i'
227 227 found affected revision 3 for filelog 'data/b.txt.i'
228 228 $ hg st
229 229 M D.txt
230 230 M b.txt
231 231 $ hg debugrevlogindex b.txt
232 232 rev linkrev nodeid p1 p2
233 233 0 2 05b806ebe5ea 000000000000 000000000000
234 234 1 3 a58b36ad6b65 05b806ebe5ea 000000000000
235 235 2 6 216a5fe8b8ed 000000000000 000000000000
236 236 3 7 ea4f2f2463cc 216a5fe8b8ed 000000000000
237 237 $ hg debugrevlogindex D.txt
238 238 rev linkrev nodeid p1 p2
239 239 0 6 2a8d3833f2fb 000000000000 000000000000
240 240 1 7 2a80419dfc31 2a8d3833f2fb 000000000000
241 241
242 242 Run the fix
243 243 $ hg debug-repair-issue6528
244 244 found affected revision 1 for filelog 'data/D.txt.i'
245 245 repaired revision 1 of 'filelog data/D.txt.i'
246 246 found affected revision 1 for filelog 'data/b.txt.i'
247 247 found affected revision 3 for filelog 'data/b.txt.i'
248 248 repaired revision 1 of 'filelog data/b.txt.i'
249 249 repaired revision 3 of 'filelog data/b.txt.i'
250 250
251 251 Check that the fix worked and that running it twice does nothing
252 252 $ hg st
253 253 $ hg debugrevlogindex b.txt
254 254 rev linkrev nodeid p1 p2
255 255 0 2 05b806ebe5ea 000000000000 000000000000
256 256 1 3 a58b36ad6b65 000000000000 05b806ebe5ea
257 257 2 6 216a5fe8b8ed 000000000000 000000000000
258 258 3 7 ea4f2f2463cc 000000000000 216a5fe8b8ed
259 259 $ hg debugrevlogindex D.txt
260 260 rev linkrev nodeid p1 p2
261 261 0 6 2a8d3833f2fb 000000000000 000000000000
262 262 1 7 2a80419dfc31 000000000000 2a8d3833f2fb
263 263 $ hg debug-repair-issue6528
264 264 no affected revisions were found
265 265 $ hg st
266 266 $ hg debugrevlogindex b.txt
267 267 rev linkrev nodeid p1 p2
268 268 0 2 05b806ebe5ea 000000000000 000000000000
269 269 1 3 a58b36ad6b65 000000000000 05b806ebe5ea
270 270 2 6 216a5fe8b8ed 000000000000 000000000000
271 271 3 7 ea4f2f2463cc 000000000000 216a5fe8b8ed
272 272 $ hg debugrevlogindex D.txt
273 273 rev linkrev nodeid p1 p2
274 274 0 6 2a8d3833f2fb 000000000000 000000000000
275 275 1 7 2a80419dfc31 000000000000 2a8d3833f2fb
276 276
277 277 Try the using the report options
278 278 --------------------------------
279 279
280 280 $ cd ..
281 281 $ mkdir repo-to-fix-report
282 282 $ cd repo-to-fix
283 283 #if windows
284 284 tar interprets `:` in paths (like `C:`) as being remote, force local on Windows
285 285 only since some versions of tar don't have this flag.
286 286
287 287 $ tar --force-local -xf $TESTDIR/bundles/issue6528.tar
288 288 #else
289 289 $ tar xf $TESTDIR/bundles/issue6528.tar
290 290 #endif
291 291
292 292 $ hg debug-repair-issue6528 --to-report $TESTTMP/report.txt
293 293 found affected revision 1 for filelog 'data/D.txt.i'
294 294 found affected revision 1 for filelog 'data/b.txt.i'
295 295 found affected revision 3 for filelog 'data/b.txt.i'
296 296 $ cat $TESTTMP/report.txt
297 297 2a80419dfc31d7dfb308ac40f3f138282de7d73b D.txt
298 298 a58b36ad6b6545195952793099613c2116f3563b,ea4f2f2463cca5b29ddf3461012b8ce5c6dac175 b.txt
299 299
300 300 $ hg debug-repair-issue6528 --from-report $TESTTMP/report.txt --dry-run
301 301 loading report file '$TESTTMP/report.txt'
302 302 found affected revision 1 for filelog 'D.txt'
303 303 found affected revision 1 for filelog 'b.txt'
304 304 found affected revision 3 for filelog 'b.txt'
305 305 $ hg st
306 306 M D.txt
307 307 M b.txt
308 308 $ hg debugrevlogindex b.txt
309 309 rev linkrev nodeid p1 p2
310 310 0 2 05b806ebe5ea 000000000000 000000000000
311 311 1 3 a58b36ad6b65 05b806ebe5ea 000000000000
312 312 2 6 216a5fe8b8ed 000000000000 000000000000
313 313 3 7 ea4f2f2463cc 216a5fe8b8ed 000000000000
314 314 $ hg debugrevlogindex D.txt
315 315 rev linkrev nodeid p1 p2
316 316 0 6 2a8d3833f2fb 000000000000 000000000000
317 317 1 7 2a80419dfc31 2a8d3833f2fb 000000000000
318 318
319 319 $ hg debug-repair-issue6528 --from-report $TESTTMP/report.txt
320 320 loading report file '$TESTTMP/report.txt'
321 321 found affected revision 1 for filelog 'D.txt'
322 322 repaired revision 1 of 'filelog data/D.txt.i'
323 323 found affected revision 1 for filelog 'b.txt'
324 324 found affected revision 3 for filelog 'b.txt'
325 325 repaired revision 1 of 'filelog data/b.txt.i'
326 326 repaired revision 3 of 'filelog data/b.txt.i'
327 327 $ hg st
328 328 $ hg debugrevlogindex b.txt
329 329 rev linkrev nodeid p1 p2
330 330 0 2 05b806ebe5ea 000000000000 000000000000
331 331 1 3 a58b36ad6b65 000000000000 05b806ebe5ea
332 332 2 6 216a5fe8b8ed 000000000000 000000000000
333 333 3 7 ea4f2f2463cc 000000000000 216a5fe8b8ed
334 334 $ hg debugrevlogindex D.txt
335 335 rev linkrev nodeid p1 p2
336 336 0 6 2a8d3833f2fb 000000000000 000000000000
337 337 1 7 2a80419dfc31 000000000000 2a8d3833f2fb
338 338
339 339 Check that the revision is not "fixed" again
340 340
341 341 $ hg debug-repair-issue6528 --from-report $TESTTMP/report.txt
342 342 loading report file '$TESTTMP/report.txt'
343 343 revision 2a80419dfc31d7dfb308ac40f3f138282de7d73b of file 'D.txt' is not affected
344 344 no affected revisions were found for 'D.txt'
345 345 revision a58b36ad6b6545195952793099613c2116f3563b of file 'b.txt' is not affected
346 346 revision ea4f2f2463cca5b29ddf3461012b8ce5c6dac175 of file 'b.txt' is not affected
347 347 no affected revisions were found for 'b.txt'
348 348 $ hg st
349 349 $ hg debugrevlogindex b.txt
350 350 rev linkrev nodeid p1 p2
351 351 0 2 05b806ebe5ea 000000000000 000000000000
352 352 1 3 a58b36ad6b65 000000000000 05b806ebe5ea
353 353 2 6 216a5fe8b8ed 000000000000 000000000000
354 354 3 7 ea4f2f2463cc 000000000000 216a5fe8b8ed
355 355 $ hg debugrevlogindex D.txt
356 356 rev linkrev nodeid p1 p2
357 357 0 6 2a8d3833f2fb 000000000000 000000000000
358 358 1 7 2a80419dfc31 000000000000 2a8d3833f2fb
359 359
360 360 Try it with a non-inline revlog
361 361 -------------------------------
362 362
363 363 $ cd ..
364 364 $ mkdir $TESTTMP/ext
365 365 $ cat << EOF > $TESTTMP/ext/small_inline.py
366 366 > from mercurial import revlog
367 367 > revlog._maxinline = 8
368 368 > EOF
369 369
370 370 $ cat << EOF >> $HGRCPATH
371 371 > [extensions]
372 372 > small_inline=$TESTTMP/ext/small_inline.py
373 373 > EOF
374 374
375 375 $ mkdir repo-to-fix-not-inline
376 376 $ cd repo-to-fix-not-inline
377 377 #if windows
378 378 tar interprets `:` in paths (like `C:`) as being remote, force local on Windows
379 379 only since some versions of tar don't have this flag.
380 380
381 381 $ tar --force-local -xf $TESTDIR/bundles/issue6528.tar
382 382 #else
383 383 $ tar xf $TESTDIR/bundles/issue6528.tar
384 384 #endif
385 385 $ echo b >> b.txt
386 386 $ hg commit -qm "inline -> separate"
387 387 $ find .hg -name *b.txt.d
388 388 .hg/store/data/b.txt.d
389 389
390 390 Status is correct, but the problem is still there, in the earlier revision
391 391 $ hg st
392 392 $ hg up 3
393 393 1 files updated, 0 files merged, 1 files removed, 0 files unresolved
394 394 $ hg st
395 395 M b.txt
396 396 $ hg debugrevlogindex b.txt
397 397 rev linkrev nodeid p1 p2
398 398 0 2 05b806ebe5ea 000000000000 000000000000
399 399 1 3 a58b36ad6b65 05b806ebe5ea 000000000000
400 400 2 6 216a5fe8b8ed 000000000000 000000000000
401 401 3 7 ea4f2f2463cc 216a5fe8b8ed 000000000000
402 402 4 8 db234885e2fe ea4f2f2463cc 000000000000
403 403 $ hg debugrevlogindex D.txt
404 404 rev linkrev nodeid p1 p2
405 405 0 6 2a8d3833f2fb 000000000000 000000000000
406 406 1 7 2a80419dfc31 2a8d3833f2fb 000000000000
407 407 2 8 65aecc89bb5d 2a80419dfc31 000000000000
408 408
409 409 Run the fix on the non-inline revlog
410 410 $ hg debug-repair-issue6528
411 411 found affected revision 1 for filelog 'data/D.txt.i'
412 412 repaired revision 1 of 'filelog data/D.txt.i'
413 413 found affected revision 1 for filelog 'data/b.txt.i'
414 414 found affected revision 3 for filelog 'data/b.txt.i'
415 415 repaired revision 1 of 'filelog data/b.txt.i'
416 416 repaired revision 3 of 'filelog data/b.txt.i'
417 417
418 418 Check that it worked
419 419 $ hg debugrevlogindex b.txt
420 420 rev linkrev nodeid p1 p2
421 421 0 2 05b806ebe5ea 000000000000 000000000000
422 422 1 3 a58b36ad6b65 000000000000 05b806ebe5ea
423 423 2 6 216a5fe8b8ed 000000000000 000000000000
424 424 3 7 ea4f2f2463cc 000000000000 216a5fe8b8ed
425 425 4 8 db234885e2fe ea4f2f2463cc 000000000000
426 426 $ hg debugrevlogindex D.txt
427 427 rev linkrev nodeid p1 p2
428 428 0 6 2a8d3833f2fb 000000000000 000000000000
429 429 1 7 2a80419dfc31 000000000000 2a8d3833f2fb
430 430 2 8 65aecc89bb5d 2a80419dfc31 000000000000
431 431 $ hg debug-repair-issue6528
432 432 no affected revisions were found
433 433 $ hg st
434
435 $ cd ..
436
437 Applying a bad bundle should fix it on the fly
438 ----------------------------------------------
439
440 from a v1 bundle
441 ~~~~~~~~~~~~~~~~
442
443 $ hg debugbundle --spec "$TESTDIR"/bundles/issue6528.hg-v1
444 bzip2-v1
445
446 $ hg init unbundle-v1
447 $ cd unbundle-v1
448
449 $ hg unbundle "$TESTDIR"/bundles/issue6528.hg-v1
450 adding changesets
451 adding manifests
452 adding file changes
453 added 8 changesets with 12 changes to 4 files
454 new changesets f5a5a568022f:3beabb508514 (8 drafts)
455 (run 'hg update' to get a working copy)
456
457 Check that revision were fixed on the fly
458
459 $ hg debugrevlogindex b.txt
460 rev linkrev nodeid p1 p2
461 0 2 05b806ebe5ea 000000000000 000000000000
462 1 3 a58b36ad6b65 000000000000 05b806ebe5ea
463 2 6 216a5fe8b8ed 000000000000 000000000000
464 3 7 ea4f2f2463cc 000000000000 216a5fe8b8ed
465
466 $ hg debugrevlogindex D.txt
467 rev linkrev nodeid p1 p2
468 0 6 2a8d3833f2fb 000000000000 000000000000
469 1 7 2a80419dfc31 000000000000 2a8d3833f2fb
470
471 That we don't see the symptoms of the bug
472
473 $ hg up -- -1
474 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
475 $ hg status
476
477 And that the repair command does not find anything to fix
478
479 $ hg debug-repair-issue6528
480 no affected revisions were found
481
482 $ cd ..
483
484 from a v2 bundle
485 ~~~~~~~~~~~~~~~~
486
487 $ hg debugbundle --spec "$TESTDIR"/bundles/issue6528.hg-v2
488 bzip2-v2
489
490 $ hg init unbundle-v2
491 $ cd unbundle-v2
492
493 $ hg unbundle "$TESTDIR"/bundles/issue6528.hg-v2
494 adding changesets
495 adding manifests
496 adding file changes
497 added 8 changesets with 12 changes to 4 files
498 new changesets f5a5a568022f:3beabb508514 (8 drafts)
499 (run 'hg update' to get a working copy)
500
501 Check that revision were fixed on the fly
502
503 $ hg debugrevlogindex b.txt
504 rev linkrev nodeid p1 p2
505 0 2 05b806ebe5ea 000000000000 000000000000
506 1 3 a58b36ad6b65 000000000000 05b806ebe5ea
507 2 6 216a5fe8b8ed 000000000000 000000000000
508 3 7 ea4f2f2463cc 000000000000 216a5fe8b8ed
509
510 $ hg debugrevlogindex D.txt
511 rev linkrev nodeid p1 p2
512 0 6 2a8d3833f2fb 000000000000 000000000000
513 1 7 2a80419dfc31 000000000000 2a8d3833f2fb
514
515 That we don't see the symptoms of the bug
516
517 $ hg up -- -1
518 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
519 $ hg status
520
521 And that the repair command does not find anything to fix
522
523 $ hg debug-repair-issue6528
524 no affected revisions were found
525
526 $ cd ..
General Comments 0
You need to be logged in to leave comments. Login now