##// END OF EJS Templates
sidedata: replace sidedata upgrade mechanism with the new one...
Raphaël Gomès -
r47847:27f1191b default
parent child Browse files
Show More
@@ -1,967 +1,922 b''
1 # coding: utf-8
1 # coding: utf-8
2 # metadata.py -- code related to various metadata computation and access.
2 # metadata.py -- code related to various metadata computation and access.
3 #
3 #
4 # Copyright 2019 Google, Inc <martinvonz@google.com>
4 # Copyright 2019 Google, Inc <martinvonz@google.com>
5 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
5 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9 from __future__ import absolute_import, print_function
9 from __future__ import absolute_import, print_function
10
10
11 import multiprocessing
11 import multiprocessing
12 import struct
12 import struct
13
13
14 from .node import nullrev
14 from .node import nullrev
15 from . import (
15 from . import (
16 error,
16 error,
17 pycompat,
18 requirements as requirementsmod,
17 requirements as requirementsmod,
19 util,
18 util,
20 )
19 )
21
20
22 from .revlogutils import (
21 from .revlogutils import (
23 constants as revlogconst,
22 constants as revlogconst,
24 flagutil as sidedataflag,
23 flagutil as sidedataflag,
25 sidedata as sidedatamod,
24 sidedata as sidedatamod,
26 )
25 )
27
26
28
27
29 class ChangingFiles(object):
28 class ChangingFiles(object):
30 """A class recording the changes made to files by a changeset
29 """A class recording the changes made to files by a changeset
31
30
32 Actions performed on files are gathered into 3 sets:
31 Actions performed on files are gathered into 3 sets:
33
32
34 - added: files actively added in the changeset.
33 - added: files actively added in the changeset.
35 - merged: files whose history got merged
34 - merged: files whose history got merged
36 - removed: files removed in the revision
35 - removed: files removed in the revision
37 - salvaged: files that might have been deleted by a merge but were not
36 - salvaged: files that might have been deleted by a merge but were not
38 - touched: files affected by the merge
37 - touched: files affected by the merge
39
38
40 and copies information is held by 2 mappings
39 and copies information is held by 2 mappings
41
40
42 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
41 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
43 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
42 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
44
43
45 See their inline help for details.
44 See their inline help for details.
46 """
45 """
47
46
48 def __init__(
47 def __init__(
49 self,
48 self,
50 touched=None,
49 touched=None,
51 added=None,
50 added=None,
52 removed=None,
51 removed=None,
53 merged=None,
52 merged=None,
54 salvaged=None,
53 salvaged=None,
55 p1_copies=None,
54 p1_copies=None,
56 p2_copies=None,
55 p2_copies=None,
57 ):
56 ):
58 self._added = set(() if added is None else added)
57 self._added = set(() if added is None else added)
59 self._merged = set(() if merged is None else merged)
58 self._merged = set(() if merged is None else merged)
60 self._removed = set(() if removed is None else removed)
59 self._removed = set(() if removed is None else removed)
61 self._touched = set(() if touched is None else touched)
60 self._touched = set(() if touched is None else touched)
62 self._salvaged = set(() if salvaged is None else salvaged)
61 self._salvaged = set(() if salvaged is None else salvaged)
63 self._touched.update(self._added)
62 self._touched.update(self._added)
64 self._touched.update(self._merged)
63 self._touched.update(self._merged)
65 self._touched.update(self._removed)
64 self._touched.update(self._removed)
66 self._p1_copies = dict(() if p1_copies is None else p1_copies)
65 self._p1_copies = dict(() if p1_copies is None else p1_copies)
67 self._p2_copies = dict(() if p2_copies is None else p2_copies)
66 self._p2_copies = dict(() if p2_copies is None else p2_copies)
68
67
69 def __eq__(self, other):
68 def __eq__(self, other):
70 return (
69 return (
71 self.added == other.added
70 self.added == other.added
72 and self.merged == other.merged
71 and self.merged == other.merged
73 and self.removed == other.removed
72 and self.removed == other.removed
74 and self.salvaged == other.salvaged
73 and self.salvaged == other.salvaged
75 and self.touched == other.touched
74 and self.touched == other.touched
76 and self.copied_from_p1 == other.copied_from_p1
75 and self.copied_from_p1 == other.copied_from_p1
77 and self.copied_from_p2 == other.copied_from_p2
76 and self.copied_from_p2 == other.copied_from_p2
78 )
77 )
79
78
80 @property
79 @property
81 def has_copies_info(self):
80 def has_copies_info(self):
82 return bool(
81 return bool(
83 self.removed
82 self.removed
84 or self.merged
83 or self.merged
85 or self.salvaged
84 or self.salvaged
86 or self.copied_from_p1
85 or self.copied_from_p1
87 or self.copied_from_p2
86 or self.copied_from_p2
88 )
87 )
89
88
90 @util.propertycache
89 @util.propertycache
91 def added(self):
90 def added(self):
92 """files actively added in the changeset
91 """files actively added in the changeset
93
92
94 Any file present in that revision that was absent in all the changeset's
93 Any file present in that revision that was absent in all the changeset's
95 parents.
94 parents.
96
95
97 In case of merge, this means a file absent in one of the parents but
96 In case of merge, this means a file absent in one of the parents but
98 existing in the other will *not* be contained in this set. (They were
97 existing in the other will *not* be contained in this set. (They were
99 added by an ancestor)
98 added by an ancestor)
100 """
99 """
101 return frozenset(self._added)
100 return frozenset(self._added)
102
101
103 def mark_added(self, filename):
102 def mark_added(self, filename):
104 if 'added' in vars(self):
103 if 'added' in vars(self):
105 del self.added
104 del self.added
106 self._added.add(filename)
105 self._added.add(filename)
107 self.mark_touched(filename)
106 self.mark_touched(filename)
108
107
109 def update_added(self, filenames):
108 def update_added(self, filenames):
110 for f in filenames:
109 for f in filenames:
111 self.mark_added(f)
110 self.mark_added(f)
112
111
113 @util.propertycache
112 @util.propertycache
114 def merged(self):
113 def merged(self):
115 """files actively merged during a merge
114 """files actively merged during a merge
116
115
117 Any modified files which had modification on both size that needed merging.
116 Any modified files which had modification on both size that needed merging.
118
117
119 In this case a new filenode was created and it has two parents.
118 In this case a new filenode was created and it has two parents.
120 """
119 """
121 return frozenset(self._merged)
120 return frozenset(self._merged)
122
121
123 def mark_merged(self, filename):
122 def mark_merged(self, filename):
124 if 'merged' in vars(self):
123 if 'merged' in vars(self):
125 del self.merged
124 del self.merged
126 self._merged.add(filename)
125 self._merged.add(filename)
127 self.mark_touched(filename)
126 self.mark_touched(filename)
128
127
129 def update_merged(self, filenames):
128 def update_merged(self, filenames):
130 for f in filenames:
129 for f in filenames:
131 self.mark_merged(f)
130 self.mark_merged(f)
132
131
133 @util.propertycache
132 @util.propertycache
134 def removed(self):
133 def removed(self):
135 """files actively removed by the changeset
134 """files actively removed by the changeset
136
135
137 In case of merge this will only contain the set of files removing "new"
136 In case of merge this will only contain the set of files removing "new"
138 content. For any file absent in the current changeset:
137 content. For any file absent in the current changeset:
139
138
140 a) If the file exists in both parents, it is clearly "actively" removed
139 a) If the file exists in both parents, it is clearly "actively" removed
141 by this changeset.
140 by this changeset.
142
141
143 b) If a file exists in only one parent and in none of the common
142 b) If a file exists in only one parent and in none of the common
144 ancestors, then the file was newly added in one of the merged branches
143 ancestors, then the file was newly added in one of the merged branches
145 and then got "actively" removed.
144 and then got "actively" removed.
146
145
147 c) If a file exists in only one parent and at least one of the common
146 c) If a file exists in only one parent and at least one of the common
148 ancestors using the same filenode, then the file was unchanged on one
147 ancestors using the same filenode, then the file was unchanged on one
149 side and deleted on the other side. The merge "passively" propagated
148 side and deleted on the other side. The merge "passively" propagated
150 that deletion, but didn't "actively" remove the file. In this case the
149 that deletion, but didn't "actively" remove the file. In this case the
151 file is *not* included in the `removed` set.
150 file is *not* included in the `removed` set.
152
151
153 d) If a file exists in only one parent and at least one of the common
152 d) If a file exists in only one parent and at least one of the common
154 ancestors using a different filenode, then the file was changed on one
153 ancestors using a different filenode, then the file was changed on one
155 side and removed on the other side. The merge process "actively"
154 side and removed on the other side. The merge process "actively"
156 decided to drop the new change and delete the file. Unlike in the
155 decided to drop the new change and delete the file. Unlike in the
157 previous case, (c), the file included in the `removed` set.
156 previous case, (c), the file included in the `removed` set.
158
157
159 Summary table for merge:
158 Summary table for merge:
160
159
161 case | exists in parents | exists in gca || removed
160 case | exists in parents | exists in gca || removed
162 (a) | both | * || yes
161 (a) | both | * || yes
163 (b) | one | none || yes
162 (b) | one | none || yes
164 (c) | one | same filenode || no
163 (c) | one | same filenode || no
165 (d) | one | new filenode || yes
164 (d) | one | new filenode || yes
166 """
165 """
167 return frozenset(self._removed)
166 return frozenset(self._removed)
168
167
169 def mark_removed(self, filename):
168 def mark_removed(self, filename):
170 if 'removed' in vars(self):
169 if 'removed' in vars(self):
171 del self.removed
170 del self.removed
172 self._removed.add(filename)
171 self._removed.add(filename)
173 self.mark_touched(filename)
172 self.mark_touched(filename)
174
173
175 def update_removed(self, filenames):
174 def update_removed(self, filenames):
176 for f in filenames:
175 for f in filenames:
177 self.mark_removed(f)
176 self.mark_removed(f)
178
177
179 @util.propertycache
178 @util.propertycache
180 def salvaged(self):
179 def salvaged(self):
181 """files that might have been deleted by a merge, but still exists.
180 """files that might have been deleted by a merge, but still exists.
182
181
183 During a merge, the manifest merging might select some files for
182 During a merge, the manifest merging might select some files for
184 removal, or for a removed/changed conflict. If at commit time the file
183 removal, or for a removed/changed conflict. If at commit time the file
185 still exists, its removal was "reverted" and the file is "salvaged"
184 still exists, its removal was "reverted" and the file is "salvaged"
186 """
185 """
187 return frozenset(self._salvaged)
186 return frozenset(self._salvaged)
188
187
189 def mark_salvaged(self, filename):
188 def mark_salvaged(self, filename):
190 if "salvaged" in vars(self):
189 if "salvaged" in vars(self):
191 del self.salvaged
190 del self.salvaged
192 self._salvaged.add(filename)
191 self._salvaged.add(filename)
193 self.mark_touched(filename)
192 self.mark_touched(filename)
194
193
195 def update_salvaged(self, filenames):
194 def update_salvaged(self, filenames):
196 for f in filenames:
195 for f in filenames:
197 self.mark_salvaged(f)
196 self.mark_salvaged(f)
198
197
199 @util.propertycache
198 @util.propertycache
200 def touched(self):
199 def touched(self):
201 """files either actively modified, added or removed"""
200 """files either actively modified, added or removed"""
202 return frozenset(self._touched)
201 return frozenset(self._touched)
203
202
204 def mark_touched(self, filename):
203 def mark_touched(self, filename):
205 if 'touched' in vars(self):
204 if 'touched' in vars(self):
206 del self.touched
205 del self.touched
207 self._touched.add(filename)
206 self._touched.add(filename)
208
207
209 def update_touched(self, filenames):
208 def update_touched(self, filenames):
210 for f in filenames:
209 for f in filenames:
211 self.mark_touched(f)
210 self.mark_touched(f)
212
211
213 @util.propertycache
212 @util.propertycache
214 def copied_from_p1(self):
213 def copied_from_p1(self):
215 return self._p1_copies.copy()
214 return self._p1_copies.copy()
216
215
217 def mark_copied_from_p1(self, source, dest):
216 def mark_copied_from_p1(self, source, dest):
218 if 'copied_from_p1' in vars(self):
217 if 'copied_from_p1' in vars(self):
219 del self.copied_from_p1
218 del self.copied_from_p1
220 self._p1_copies[dest] = source
219 self._p1_copies[dest] = source
221
220
222 def update_copies_from_p1(self, copies):
221 def update_copies_from_p1(self, copies):
223 for dest, source in copies.items():
222 for dest, source in copies.items():
224 self.mark_copied_from_p1(source, dest)
223 self.mark_copied_from_p1(source, dest)
225
224
226 @util.propertycache
225 @util.propertycache
227 def copied_from_p2(self):
226 def copied_from_p2(self):
228 return self._p2_copies.copy()
227 return self._p2_copies.copy()
229
228
230 def mark_copied_from_p2(self, source, dest):
229 def mark_copied_from_p2(self, source, dest):
231 if 'copied_from_p2' in vars(self):
230 if 'copied_from_p2' in vars(self):
232 del self.copied_from_p2
231 del self.copied_from_p2
233 self._p2_copies[dest] = source
232 self._p2_copies[dest] = source
234
233
235 def update_copies_from_p2(self, copies):
234 def update_copies_from_p2(self, copies):
236 for dest, source in copies.items():
235 for dest, source in copies.items():
237 self.mark_copied_from_p2(source, dest)
236 self.mark_copied_from_p2(source, dest)
238
237
239
238
240 def compute_all_files_changes(ctx):
239 def compute_all_files_changes(ctx):
241 """compute the files changed by a revision"""
240 """compute the files changed by a revision"""
242 p1 = ctx.p1()
241 p1 = ctx.p1()
243 p2 = ctx.p2()
242 p2 = ctx.p2()
244 if p1.rev() == nullrev and p2.rev() == nullrev:
243 if p1.rev() == nullrev and p2.rev() == nullrev:
245 return _process_root(ctx)
244 return _process_root(ctx)
246 elif p1.rev() != nullrev and p2.rev() == nullrev:
245 elif p1.rev() != nullrev and p2.rev() == nullrev:
247 return _process_linear(p1, ctx)
246 return _process_linear(p1, ctx)
248 elif p1.rev() == nullrev and p2.rev() != nullrev:
247 elif p1.rev() == nullrev and p2.rev() != nullrev:
249 # In the wild, one can encounter changeset where p1 is null but p2 is not
248 # In the wild, one can encounter changeset where p1 is null but p2 is not
250 return _process_linear(p1, ctx, parent=2)
249 return _process_linear(p1, ctx, parent=2)
251 elif p1.rev() == p2.rev():
250 elif p1.rev() == p2.rev():
252 # In the wild, one can encounter such "non-merge"
251 # In the wild, one can encounter such "non-merge"
253 return _process_linear(p1, ctx)
252 return _process_linear(p1, ctx)
254 else:
253 else:
255 return _process_merge(p1, p2, ctx)
254 return _process_merge(p1, p2, ctx)
256
255
257
256
258 def _process_root(ctx):
257 def _process_root(ctx):
259 """compute the appropriate changed files for a changeset with no parents"""
258 """compute the appropriate changed files for a changeset with no parents"""
260 # Simple, there was nothing before it, so everything is added.
259 # Simple, there was nothing before it, so everything is added.
261 md = ChangingFiles()
260 md = ChangingFiles()
262 manifest = ctx.manifest()
261 manifest = ctx.manifest()
263 for filename in manifest:
262 for filename in manifest:
264 md.mark_added(filename)
263 md.mark_added(filename)
265 return md
264 return md
266
265
267
266
268 def _process_linear(parent_ctx, children_ctx, parent=1):
267 def _process_linear(parent_ctx, children_ctx, parent=1):
269 """compute the appropriate changed files for a changeset with a single parent"""
268 """compute the appropriate changed files for a changeset with a single parent"""
270 md = ChangingFiles()
269 md = ChangingFiles()
271 parent_manifest = parent_ctx.manifest()
270 parent_manifest = parent_ctx.manifest()
272 children_manifest = children_ctx.manifest()
271 children_manifest = children_ctx.manifest()
273
272
274 copies_candidate = []
273 copies_candidate = []
275
274
276 for filename, d in parent_manifest.diff(children_manifest).items():
275 for filename, d in parent_manifest.diff(children_manifest).items():
277 if d[1][0] is None:
276 if d[1][0] is None:
278 # no filenode for the "new" value, file is absent
277 # no filenode for the "new" value, file is absent
279 md.mark_removed(filename)
278 md.mark_removed(filename)
280 else:
279 else:
281 copies_candidate.append(filename)
280 copies_candidate.append(filename)
282 if d[0][0] is None:
281 if d[0][0] is None:
283 # not filenode for the "old" value file was absent
282 # not filenode for the "old" value file was absent
284 md.mark_added(filename)
283 md.mark_added(filename)
285 else:
284 else:
286 # filenode for both "old" and "new"
285 # filenode for both "old" and "new"
287 md.mark_touched(filename)
286 md.mark_touched(filename)
288
287
289 if parent == 1:
288 if parent == 1:
290 copied = md.mark_copied_from_p1
289 copied = md.mark_copied_from_p1
291 elif parent == 2:
290 elif parent == 2:
292 copied = md.mark_copied_from_p2
291 copied = md.mark_copied_from_p2
293 else:
292 else:
294 assert False, "bad parent value %d" % parent
293 assert False, "bad parent value %d" % parent
295
294
296 for filename in copies_candidate:
295 for filename in copies_candidate:
297 copy_info = children_ctx[filename].renamed()
296 copy_info = children_ctx[filename].renamed()
298 if copy_info:
297 if copy_info:
299 source, srcnode = copy_info
298 source, srcnode = copy_info
300 copied(source, filename)
299 copied(source, filename)
301
300
302 return md
301 return md
303
302
304
303
305 def _process_merge(p1_ctx, p2_ctx, ctx):
304 def _process_merge(p1_ctx, p2_ctx, ctx):
306 """compute the appropriate changed files for a changeset with two parents
305 """compute the appropriate changed files for a changeset with two parents
307
306
308 This is a more advance case. The information we need to record is summarise
307 This is a more advance case. The information we need to record is summarise
309 in the following table:
308 in the following table:
310
309
311 ┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐
310 ┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐
312 │ diff ╲ diff │ ø │ (Some, None) │ (None, Some) │ (Some, Some) │
311 │ diff ╲ diff │ ø │ (Some, None) │ (None, Some) │ (Some, Some) │
313 │ p2 ╲ p1 │ │ │ │ │
312 │ p2 ╲ p1 │ │ │ │ │
314 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
313 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
315 │ │ │🄱 No Changes │🄳 No Changes │ │
314 │ │ │🄱 No Changes │🄳 No Changes │ │
316 │ ø │🄰 No Changes │ OR │ OR │🄵 No Changes │
315 │ ø │🄰 No Changes │ OR │ OR │🄵 No Changes │
317 │ │ │🄲 Deleted[1] │🄴 Salvaged[2]│ [3] │
316 │ │ │🄲 Deleted[1] │🄴 Salvaged[2]│ [3] │
318 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
317 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
319 │ │🄶 No Changes │ │ │ │
318 │ │🄶 No Changes │ │ │ │
320 │ (Some, None) │ OR │🄻 Deleted │ ø │ ø │
319 │ (Some, None) │ OR │🄻 Deleted │ ø │ ø │
321 │ │🄷 Deleted[1] │ │ │ │
320 │ │🄷 Deleted[1] │ │ │ │
322 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
321 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
323 │ │🄸 No Changes │ │ │ 🄽 Touched │
322 │ │🄸 No Changes │ │ │ 🄽 Touched │
324 │ (None, Some) │ OR │ ø │🄼 Added │OR 🅀 Salvaged │
323 │ (None, Some) │ OR │ ø │🄼 Added │OR 🅀 Salvaged │
325 │ │🄹 Salvaged[2]│ │ (copied?) │ (copied?) │
324 │ │🄹 Salvaged[2]│ │ (copied?) │ (copied?) │
326 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
325 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
327 │ │ │ │ 🄾 Touched │ 🄿 Merged │
326 │ │ │ │ 🄾 Touched │ 🄿 Merged │
328 │ (Some, Some) │🄺 No Changes │ ø │OR 🅁 Salvaged │OR 🅂 Touched │
327 │ (Some, Some) │🄺 No Changes │ ø │OR 🅁 Salvaged │OR 🅂 Touched │
329 │ │ [3] │ │ (copied?) │ (copied?) │
328 │ │ [3] │ │ (copied?) │ (copied?) │
330 └──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘
329 └──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘
331
330
332 Special case [1]:
331 Special case [1]:
333
332
334 The situation is:
333 The situation is:
335 - parent-A: file exists,
334 - parent-A: file exists,
336 - parent-B: no file,
335 - parent-B: no file,
337 - working-copy: no file.
336 - working-copy: no file.
338
337
339 Detecting a "deletion" will depend on the presence of actual change on
338 Detecting a "deletion" will depend on the presence of actual change on
340 the "parent-A" branch:
339 the "parent-A" branch:
341
340
342 Subcase 🄱 or 🄶 : if the state of the file in "parent-A" is unchanged
341 Subcase 🄱 or 🄶 : if the state of the file in "parent-A" is unchanged
343 compared to the merge ancestors, then parent-A branch left the file
342 compared to the merge ancestors, then parent-A branch left the file
344 untouched while parent-B deleted it. We simply apply the change from
343 untouched while parent-B deleted it. We simply apply the change from
345 "parent-B" branch the file was automatically dropped.
344 "parent-B" branch the file was automatically dropped.
346 The result is:
345 The result is:
347 - file is not recorded as touched by the merge.
346 - file is not recorded as touched by the merge.
348
347
349 Subcase 🄲 or 🄷 : otherwise, the change from parent-A branch were explicitly dropped and
348 Subcase 🄲 or 🄷 : otherwise, the change from parent-A branch were explicitly dropped and
350 the file was "deleted again". From a user perspective, the message
349 the file was "deleted again". From a user perspective, the message
351 about "locally changed" while "remotely deleted" (or the other way
350 about "locally changed" while "remotely deleted" (or the other way
352 around) was issued and the user chose to deleted the file.
351 around) was issued and the user chose to deleted the file.
353 The result:
352 The result:
354 - file is recorded as touched by the merge.
353 - file is recorded as touched by the merge.
355
354
356
355
357 Special case [2]:
356 Special case [2]:
358
357
359 The situation is:
358 The situation is:
360 - parent-A: no file,
359 - parent-A: no file,
361 - parent-B: file,
360 - parent-B: file,
362 - working-copy: file (same content as parent-B).
361 - working-copy: file (same content as parent-B).
363
362
364 There are three subcases depending on the ancestors contents:
363 There are three subcases depending on the ancestors contents:
365
364
366 - A) the file is missing in all ancestors,
365 - A) the file is missing in all ancestors,
367 - B) at least one ancestor has the file with filenode ≠ from parent-B,
366 - B) at least one ancestor has the file with filenode ≠ from parent-B,
368 - C) all ancestors use the same filenode as parent-B,
367 - C) all ancestors use the same filenode as parent-B,
369
368
370 Subcase (A) is the simpler, nothing happend on parent-A side while
369 Subcase (A) is the simpler, nothing happend on parent-A side while
371 parent-B added it.
370 parent-B added it.
372
371
373 The result:
372 The result:
374 - the file is not marked as touched by the merge.
373 - the file is not marked as touched by the merge.
375
374
376 Subcase (B) is the counter part of "Special case [1]", the file was
375 Subcase (B) is the counter part of "Special case [1]", the file was
377 modified on parent-B side, while parent-A side deleted it. However this
376 modified on parent-B side, while parent-A side deleted it. However this
378 time, the conflict was solved by keeping the file (and its
377 time, the conflict was solved by keeping the file (and its
379 modification). We consider the file as "salvaged".
378 modification). We consider the file as "salvaged".
380
379
381 The result:
380 The result:
382 - the file is marked as "salvaged" by the merge.
381 - the file is marked as "salvaged" by the merge.
383
382
384 Subcase (C) is subtle variation of the case above. In this case, the
383 Subcase (C) is subtle variation of the case above. In this case, the
385 file in unchanged on the parent-B side and actively removed on the
384 file in unchanged on the parent-B side and actively removed on the
386 parent-A side. So the merge machinery correctly decide it should be
385 parent-A side. So the merge machinery correctly decide it should be
387 removed. However, the file was explicitly restored to its parent-B
386 removed. However, the file was explicitly restored to its parent-B
388 content before the merge was commited. The file is be marked
387 content before the merge was commited. The file is be marked
389 as salvaged too. From the merge result perspective, this is similar to
388 as salvaged too. From the merge result perspective, this is similar to
390 Subcase (B), however from the merge resolution perspective they differ
389 Subcase (B), however from the merge resolution perspective they differ
391 since in (C), there was some conflict not obvious solution to the
390 since in (C), there was some conflict not obvious solution to the
392 merge (That got reversed)
391 merge (That got reversed)
393
392
394 Special case [3]:
393 Special case [3]:
395
394
396 The situation is:
395 The situation is:
397 - parent-A: file,
396 - parent-A: file,
398 - parent-B: file (different filenode as parent-A),
397 - parent-B: file (different filenode as parent-A),
399 - working-copy: file (same filenode as parent-B).
398 - working-copy: file (same filenode as parent-B).
400
399
401 This case is in theory much simple, for this to happens, this mean the
400 This case is in theory much simple, for this to happens, this mean the
402 filenode in parent-A is purely replacing the one in parent-B (either a
401 filenode in parent-A is purely replacing the one in parent-B (either a
403 descendant, or a full new file history, see changeset). So the merge
402 descendant, or a full new file history, see changeset). So the merge
404 introduce no changes, and the file is not affected by the merge...
403 introduce no changes, and the file is not affected by the merge...
405
404
406 However, in the wild it is possible to find commit with the above is not
405 However, in the wild it is possible to find commit with the above is not
407 True. For example repository have some commit where the *new* node is an
406 True. For example repository have some commit where the *new* node is an
408 ancestor of the node in parent-A, or where parent-A and parent-B are two
407 ancestor of the node in parent-A, or where parent-A and parent-B are two
409 branches of the same file history, yet not merge-filenode were created
408 branches of the same file history, yet not merge-filenode were created
410 (while the "merge" should have led to a "modification").
409 (while the "merge" should have led to a "modification").
411
410
412 Detecting such cases (and not recording the file as modified) would be a
411 Detecting such cases (and not recording the file as modified) would be a
413 nice bonus. However do not any of this yet.
412 nice bonus. However do not any of this yet.
414 """
413 """
415
414
416 repo = ctx.repo()
415 repo = ctx.repo()
417 md = ChangingFiles()
416 md = ChangingFiles()
418
417
419 m = ctx.manifest()
418 m = ctx.manifest()
420 p1m = p1_ctx.manifest()
419 p1m = p1_ctx.manifest()
421 p2m = p2_ctx.manifest()
420 p2m = p2_ctx.manifest()
422 diff_p1 = p1m.diff(m)
421 diff_p1 = p1m.diff(m)
423 diff_p2 = p2m.diff(m)
422 diff_p2 = p2m.diff(m)
424
423
425 cahs = ctx.repo().changelog.commonancestorsheads(
424 cahs = ctx.repo().changelog.commonancestorsheads(
426 p1_ctx.node(), p2_ctx.node()
425 p1_ctx.node(), p2_ctx.node()
427 )
426 )
428 if not cahs:
427 if not cahs:
429 cahs = [nullrev]
428 cahs = [nullrev]
430 mas = [ctx.repo()[r].manifest() for r in cahs]
429 mas = [ctx.repo()[r].manifest() for r in cahs]
431
430
432 copy_candidates = []
431 copy_candidates = []
433
432
434 # Dealing with case 🄰 happens automatically. Since there are no entry in
433 # Dealing with case 🄰 happens automatically. Since there are no entry in
435 # d1 nor d2, we won't iterate on it ever.
434 # d1 nor d2, we won't iterate on it ever.
436
435
437 # Iteration over d1 content will deal with all cases, but the one in the
436 # Iteration over d1 content will deal with all cases, but the one in the
438 # first column of the table.
437 # first column of the table.
439 for filename, d1 in diff_p1.items():
438 for filename, d1 in diff_p1.items():
440
439
441 d2 = diff_p2.pop(filename, None)
440 d2 = diff_p2.pop(filename, None)
442
441
443 if d2 is None:
442 if d2 is None:
444 # this deal with the first line of the table.
443 # this deal with the first line of the table.
445 _process_other_unchanged(md, mas, filename, d1)
444 _process_other_unchanged(md, mas, filename, d1)
446 else:
445 else:
447
446
448 if d1[0][0] is None and d2[0][0] is None:
447 if d1[0][0] is None and d2[0][0] is None:
449 # case 🄼 — both deleted the file.
448 # case 🄼 — both deleted the file.
450 md.mark_added(filename)
449 md.mark_added(filename)
451 copy_candidates.append(filename)
450 copy_candidates.append(filename)
452 elif d1[1][0] is None and d2[1][0] is None:
451 elif d1[1][0] is None and d2[1][0] is None:
453 # case 🄻 — both deleted the file.
452 # case 🄻 — both deleted the file.
454 md.mark_removed(filename)
453 md.mark_removed(filename)
455 elif d1[1][0] is not None and d2[1][0] is not None:
454 elif d1[1][0] is not None and d2[1][0] is not None:
456 if d1[0][0] is None or d2[0][0] is None:
455 if d1[0][0] is None or d2[0][0] is None:
457 if any(_find(ma, filename) is not None for ma in mas):
456 if any(_find(ma, filename) is not None for ma in mas):
458 # case 🅀 or 🅁
457 # case 🅀 or 🅁
459 md.mark_salvaged(filename)
458 md.mark_salvaged(filename)
460 else:
459 else:
461 # case 🄽 🄾 : touched
460 # case 🄽 🄾 : touched
462 md.mark_touched(filename)
461 md.mark_touched(filename)
463 else:
462 else:
464 fctx = repo.filectx(filename, fileid=d1[1][0])
463 fctx = repo.filectx(filename, fileid=d1[1][0])
465 if fctx.p2().rev() == nullrev:
464 if fctx.p2().rev() == nullrev:
466 # case 🅂
465 # case 🅂
467 # lets assume we can trust the file history. If the
466 # lets assume we can trust the file history. If the
468 # filenode is not a merge, the file was not merged.
467 # filenode is not a merge, the file was not merged.
469 md.mark_touched(filename)
468 md.mark_touched(filename)
470 else:
469 else:
471 # case 🄿
470 # case 🄿
472 md.mark_merged(filename)
471 md.mark_merged(filename)
473 copy_candidates.append(filename)
472 copy_candidates.append(filename)
474 else:
473 else:
475 # Impossible case, the post-merge file status cannot be None on
474 # Impossible case, the post-merge file status cannot be None on
476 # one side and Something on the other side.
475 # one side and Something on the other side.
477 assert False, "unreachable"
476 assert False, "unreachable"
478
477
479 # Iteration over remaining d2 content deal with the first column of the
478 # Iteration over remaining d2 content deal with the first column of the
480 # table.
479 # table.
481 for filename, d2 in diff_p2.items():
480 for filename, d2 in diff_p2.items():
482 _process_other_unchanged(md, mas, filename, d2)
481 _process_other_unchanged(md, mas, filename, d2)
483
482
484 for filename in copy_candidates:
483 for filename in copy_candidates:
485 copy_info = ctx[filename].renamed()
484 copy_info = ctx[filename].renamed()
486 if copy_info:
485 if copy_info:
487 source, srcnode = copy_info
486 source, srcnode = copy_info
488 if source in p1_ctx and p1_ctx[source].filenode() == srcnode:
487 if source in p1_ctx and p1_ctx[source].filenode() == srcnode:
489 md.mark_copied_from_p1(source, filename)
488 md.mark_copied_from_p1(source, filename)
490 elif source in p2_ctx and p2_ctx[source].filenode() == srcnode:
489 elif source in p2_ctx and p2_ctx[source].filenode() == srcnode:
491 md.mark_copied_from_p2(source, filename)
490 md.mark_copied_from_p2(source, filename)
492 return md
491 return md
493
492
494
493
495 def _find(manifest, filename):
494 def _find(manifest, filename):
496 """return the associate filenode or None"""
495 """return the associate filenode or None"""
497 if filename not in manifest:
496 if filename not in manifest:
498 return None
497 return None
499 return manifest.find(filename)[0]
498 return manifest.find(filename)[0]
500
499
501
500
502 def _process_other_unchanged(md, mas, filename, diff):
501 def _process_other_unchanged(md, mas, filename, diff):
503 source_node = diff[0][0]
502 source_node = diff[0][0]
504 target_node = diff[1][0]
503 target_node = diff[1][0]
505
504
506 if source_node is not None and target_node is None:
505 if source_node is not None and target_node is None:
507 if any(not _find(ma, filename) == source_node for ma in mas):
506 if any(not _find(ma, filename) == source_node for ma in mas):
508 # case 🄲 of 🄷
507 # case 🄲 of 🄷
509 md.mark_removed(filename)
508 md.mark_removed(filename)
510 # else, we have case 🄱 or 🄶 : no change need to be recorded
509 # else, we have case 🄱 or 🄶 : no change need to be recorded
511 elif source_node is None and target_node is not None:
510 elif source_node is None and target_node is not None:
512 if any(_find(ma, filename) is not None for ma in mas):
511 if any(_find(ma, filename) is not None for ma in mas):
513 # case 🄴 or 🄹
512 # case 🄴 or 🄹
514 md.mark_salvaged(filename)
513 md.mark_salvaged(filename)
515 # else, we have case 🄳 or 🄸 : simple merge without intervention
514 # else, we have case 🄳 or 🄸 : simple merge without intervention
516 elif source_node is not None and target_node is not None:
515 elif source_node is not None and target_node is not None:
517 # case 🄵 or 🄺 : simple merge without intervention
516 # case 🄵 or 🄺 : simple merge without intervention
518 #
517 #
519 # In buggy case where source_node is not an ancestors of target_node.
518 # In buggy case where source_node is not an ancestors of target_node.
520 # There should have a been a new filenode created, recording this as
519 # There should have a been a new filenode created, recording this as
521 # "modified". We do not deal with them yet.
520 # "modified". We do not deal with them yet.
522 pass
521 pass
523 else:
522 else:
524 # An impossible case, the diff algorithm should not return entry if the
523 # An impossible case, the diff algorithm should not return entry if the
525 # file is missing on both side.
524 # file is missing on both side.
526 assert False, "unreachable"
525 assert False, "unreachable"
527
526
528
527
529 def _missing_from_all_ancestors(mas, filename):
528 def _missing_from_all_ancestors(mas, filename):
530 return all(_find(ma, filename) is None for ma in mas)
529 return all(_find(ma, filename) is None for ma in mas)
531
530
532
531
533 def computechangesetfilesadded(ctx):
532 def computechangesetfilesadded(ctx):
534 """return the list of files added in a changeset"""
533 """return the list of files added in a changeset"""
535 added = []
534 added = []
536 for f in ctx.files():
535 for f in ctx.files():
537 if not any(f in p for p in ctx.parents()):
536 if not any(f in p for p in ctx.parents()):
538 added.append(f)
537 added.append(f)
539 return added
538 return added
540
539
541
540
542 def get_removal_filter(ctx, x=None):
541 def get_removal_filter(ctx, x=None):
543 """return a function to detect files "wrongly" detected as `removed`
542 """return a function to detect files "wrongly" detected as `removed`
544
543
545 When a file is removed relative to p1 in a merge, this
544 When a file is removed relative to p1 in a merge, this
546 function determines whether the absence is due to a
545 function determines whether the absence is due to a
547 deletion from a parent, or whether the merge commit
546 deletion from a parent, or whether the merge commit
548 itself deletes the file. We decide this by doing a
547 itself deletes the file. We decide this by doing a
549 simplified three way merge of the manifest entry for
548 simplified three way merge of the manifest entry for
550 the file. There are two ways we decide the merge
549 the file. There are two ways we decide the merge
551 itself didn't delete a file:
550 itself didn't delete a file:
552 - neither parent (nor the merge) contain the file
551 - neither parent (nor the merge) contain the file
553 - exactly one parent contains the file, and that
552 - exactly one parent contains the file, and that
554 parent has the same filelog entry as the merge
553 parent has the same filelog entry as the merge
555 ancestor (or all of them if there two). In other
554 ancestor (or all of them if there two). In other
556 words, that parent left the file unchanged while the
555 words, that parent left the file unchanged while the
557 other one deleted it.
556 other one deleted it.
558 One way to think about this is that deleting a file is
557 One way to think about this is that deleting a file is
559 similar to emptying it, so the list of changed files
558 similar to emptying it, so the list of changed files
560 should be similar either way. The computation
559 should be similar either way. The computation
561 described above is not done directly in _filecommit
560 described above is not done directly in _filecommit
562 when creating the list of changed files, however
561 when creating the list of changed files, however
563 it does something very similar by comparing filelog
562 it does something very similar by comparing filelog
564 nodes.
563 nodes.
565 """
564 """
566
565
567 if x is not None:
566 if x is not None:
568 p1, p2, m1, m2 = x
567 p1, p2, m1, m2 = x
569 else:
568 else:
570 p1 = ctx.p1()
569 p1 = ctx.p1()
571 p2 = ctx.p2()
570 p2 = ctx.p2()
572 m1 = p1.manifest()
571 m1 = p1.manifest()
573 m2 = p2.manifest()
572 m2 = p2.manifest()
574
573
575 @util.cachefunc
574 @util.cachefunc
576 def mas():
575 def mas():
577 p1n = p1.node()
576 p1n = p1.node()
578 p2n = p2.node()
577 p2n = p2.node()
579 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
578 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
580 if not cahs:
579 if not cahs:
581 cahs = [nullrev]
580 cahs = [nullrev]
582 return [ctx.repo()[r].manifest() for r in cahs]
581 return [ctx.repo()[r].manifest() for r in cahs]
583
582
584 def deletionfromparent(f):
583 def deletionfromparent(f):
585 if f in m1:
584 if f in m1:
586 return f not in m2 and all(
585 return f not in m2 and all(
587 f in ma and ma.find(f) == m1.find(f) for ma in mas()
586 f in ma and ma.find(f) == m1.find(f) for ma in mas()
588 )
587 )
589 elif f in m2:
588 elif f in m2:
590 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
589 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
591 else:
590 else:
592 return True
591 return True
593
592
594 return deletionfromparent
593 return deletionfromparent
595
594
596
595
597 def computechangesetfilesremoved(ctx):
596 def computechangesetfilesremoved(ctx):
598 """return the list of files removed in a changeset"""
597 """return the list of files removed in a changeset"""
599 removed = []
598 removed = []
600 for f in ctx.files():
599 for f in ctx.files():
601 if f not in ctx:
600 if f not in ctx:
602 removed.append(f)
601 removed.append(f)
603 if removed:
602 if removed:
604 rf = get_removal_filter(ctx)
603 rf = get_removal_filter(ctx)
605 removed = [r for r in removed if not rf(r)]
604 removed = [r for r in removed if not rf(r)]
606 return removed
605 return removed
607
606
608
607
609 def computechangesetfilesmerged(ctx):
608 def computechangesetfilesmerged(ctx):
610 """return the list of files merged in a changeset"""
609 """return the list of files merged in a changeset"""
611 merged = []
610 merged = []
612 if len(ctx.parents()) < 2:
611 if len(ctx.parents()) < 2:
613 return merged
612 return merged
614 for f in ctx.files():
613 for f in ctx.files():
615 if f in ctx:
614 if f in ctx:
616 fctx = ctx[f]
615 fctx = ctx[f]
617 parents = fctx._filelog.parents(fctx._filenode)
616 parents = fctx._filelog.parents(fctx._filenode)
618 if parents[1] != ctx.repo().nullid:
617 if parents[1] != ctx.repo().nullid:
619 merged.append(f)
618 merged.append(f)
620 return merged
619 return merged
621
620
622
621
623 def computechangesetcopies(ctx):
622 def computechangesetcopies(ctx):
624 """return the copies data for a changeset
623 """return the copies data for a changeset
625
624
626 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
625 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
627
626
628 Each dictionnary are in the form: `{newname: oldname}`
627 Each dictionnary are in the form: `{newname: oldname}`
629 """
628 """
630 p1copies = {}
629 p1copies = {}
631 p2copies = {}
630 p2copies = {}
632 p1 = ctx.p1()
631 p1 = ctx.p1()
633 p2 = ctx.p2()
632 p2 = ctx.p2()
634 narrowmatch = ctx._repo.narrowmatch()
633 narrowmatch = ctx._repo.narrowmatch()
635 for dst in ctx.files():
634 for dst in ctx.files():
636 if not narrowmatch(dst) or dst not in ctx:
635 if not narrowmatch(dst) or dst not in ctx:
637 continue
636 continue
638 copied = ctx[dst].renamed()
637 copied = ctx[dst].renamed()
639 if not copied:
638 if not copied:
640 continue
639 continue
641 src, srcnode = copied
640 src, srcnode = copied
642 if src in p1 and p1[src].filenode() == srcnode:
641 if src in p1 and p1[src].filenode() == srcnode:
643 p1copies[dst] = src
642 p1copies[dst] = src
644 elif src in p2 and p2[src].filenode() == srcnode:
643 elif src in p2 and p2[src].filenode() == srcnode:
645 p2copies[dst] = src
644 p2copies[dst] = src
646 return p1copies, p2copies
645 return p1copies, p2copies
647
646
648
647
649 def encodecopies(files, copies):
648 def encodecopies(files, copies):
650 items = []
649 items = []
651 for i, dst in enumerate(files):
650 for i, dst in enumerate(files):
652 if dst in copies:
651 if dst in copies:
653 items.append(b'%d\0%s' % (i, copies[dst]))
652 items.append(b'%d\0%s' % (i, copies[dst]))
654 if len(items) != len(copies):
653 if len(items) != len(copies):
655 raise error.ProgrammingError(
654 raise error.ProgrammingError(
656 b'some copy targets missing from file list'
655 b'some copy targets missing from file list'
657 )
656 )
658 return b"\n".join(items)
657 return b"\n".join(items)
659
658
660
659
661 def decodecopies(files, data):
660 def decodecopies(files, data):
662 try:
661 try:
663 copies = {}
662 copies = {}
664 if not data:
663 if not data:
665 return copies
664 return copies
666 for l in data.split(b'\n'):
665 for l in data.split(b'\n'):
667 strindex, src = l.split(b'\0')
666 strindex, src = l.split(b'\0')
668 i = int(strindex)
667 i = int(strindex)
669 dst = files[i]
668 dst = files[i]
670 copies[dst] = src
669 copies[dst] = src
671 return copies
670 return copies
672 except (ValueError, IndexError):
671 except (ValueError, IndexError):
673 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
672 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
674 # used different syntax for the value.
673 # used different syntax for the value.
675 return None
674 return None
676
675
677
676
678 def encodefileindices(files, subset):
677 def encodefileindices(files, subset):
679 subset = set(subset)
678 subset = set(subset)
680 indices = []
679 indices = []
681 for i, f in enumerate(files):
680 for i, f in enumerate(files):
682 if f in subset:
681 if f in subset:
683 indices.append(b'%d' % i)
682 indices.append(b'%d' % i)
684 return b'\n'.join(indices)
683 return b'\n'.join(indices)
685
684
686
685
687 def decodefileindices(files, data):
686 def decodefileindices(files, data):
688 try:
687 try:
689 subset = []
688 subset = []
690 if not data:
689 if not data:
691 return subset
690 return subset
692 for strindex in data.split(b'\n'):
691 for strindex in data.split(b'\n'):
693 i = int(strindex)
692 i = int(strindex)
694 if i < 0 or i >= len(files):
693 if i < 0 or i >= len(files):
695 return None
694 return None
696 subset.append(files[i])
695 subset.append(files[i])
697 return subset
696 return subset
698 except (ValueError, IndexError):
697 except (ValueError, IndexError):
699 # Perhaps someone had chosen the same key name (e.g. "added") and
698 # Perhaps someone had chosen the same key name (e.g. "added") and
700 # used different syntax for the value.
699 # used different syntax for the value.
701 return None
700 return None
702
701
703
702
704 # see mercurial/helptext/internals/revlogs.txt for details about the format
703 # see mercurial/helptext/internals/revlogs.txt for details about the format
705
704
706 ACTION_MASK = int("111" "00", 2)
705 ACTION_MASK = int("111" "00", 2)
707 # note: untouched file used as copy source will as `000` for this mask.
706 # note: untouched file used as copy source will as `000` for this mask.
708 ADDED_FLAG = int("001" "00", 2)
707 ADDED_FLAG = int("001" "00", 2)
709 MERGED_FLAG = int("010" "00", 2)
708 MERGED_FLAG = int("010" "00", 2)
710 REMOVED_FLAG = int("011" "00", 2)
709 REMOVED_FLAG = int("011" "00", 2)
711 SALVAGED_FLAG = int("100" "00", 2)
710 SALVAGED_FLAG = int("100" "00", 2)
712 TOUCHED_FLAG = int("101" "00", 2)
711 TOUCHED_FLAG = int("101" "00", 2)
713
712
714 COPIED_MASK = int("11", 2)
713 COPIED_MASK = int("11", 2)
715 COPIED_FROM_P1_FLAG = int("10", 2)
714 COPIED_FROM_P1_FLAG = int("10", 2)
716 COPIED_FROM_P2_FLAG = int("11", 2)
715 COPIED_FROM_P2_FLAG = int("11", 2)
717
716
718 # structure is <flag><filename-end><copy-source>
717 # structure is <flag><filename-end><copy-source>
719 INDEX_HEADER = struct.Struct(">L")
718 INDEX_HEADER = struct.Struct(">L")
720 INDEX_ENTRY = struct.Struct(">bLL")
719 INDEX_ENTRY = struct.Struct(">bLL")
721
720
722
721
723 def encode_files_sidedata(files):
722 def encode_files_sidedata(files):
724 all_files = set(files.touched)
723 all_files = set(files.touched)
725 all_files.update(files.copied_from_p1.values())
724 all_files.update(files.copied_from_p1.values())
726 all_files.update(files.copied_from_p2.values())
725 all_files.update(files.copied_from_p2.values())
727 all_files = sorted(all_files)
726 all_files = sorted(all_files)
728 file_idx = {f: i for (i, f) in enumerate(all_files)}
727 file_idx = {f: i for (i, f) in enumerate(all_files)}
729 file_idx[None] = 0
728 file_idx[None] = 0
730
729
731 chunks = [INDEX_HEADER.pack(len(all_files))]
730 chunks = [INDEX_HEADER.pack(len(all_files))]
732
731
733 filename_length = 0
732 filename_length = 0
734 for f in all_files:
733 for f in all_files:
735 filename_size = len(f)
734 filename_size = len(f)
736 filename_length += filename_size
735 filename_length += filename_size
737 flag = 0
736 flag = 0
738 if f in files.added:
737 if f in files.added:
739 flag |= ADDED_FLAG
738 flag |= ADDED_FLAG
740 elif f in files.merged:
739 elif f in files.merged:
741 flag |= MERGED_FLAG
740 flag |= MERGED_FLAG
742 elif f in files.removed:
741 elif f in files.removed:
743 flag |= REMOVED_FLAG
742 flag |= REMOVED_FLAG
744 elif f in files.salvaged:
743 elif f in files.salvaged:
745 flag |= SALVAGED_FLAG
744 flag |= SALVAGED_FLAG
746 elif f in files.touched:
745 elif f in files.touched:
747 flag |= TOUCHED_FLAG
746 flag |= TOUCHED_FLAG
748
747
749 copy = None
748 copy = None
750 if f in files.copied_from_p1:
749 if f in files.copied_from_p1:
751 flag |= COPIED_FROM_P1_FLAG
750 flag |= COPIED_FROM_P1_FLAG
752 copy = files.copied_from_p1.get(f)
751 copy = files.copied_from_p1.get(f)
753 elif f in files.copied_from_p2:
752 elif f in files.copied_from_p2:
754 copy = files.copied_from_p2.get(f)
753 copy = files.copied_from_p2.get(f)
755 flag |= COPIED_FROM_P2_FLAG
754 flag |= COPIED_FROM_P2_FLAG
756 copy_idx = file_idx[copy]
755 copy_idx = file_idx[copy]
757 chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))
756 chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))
758 chunks.extend(all_files)
757 chunks.extend(all_files)
759 return {sidedatamod.SD_FILES: b''.join(chunks)}
758 return {sidedatamod.SD_FILES: b''.join(chunks)}
760
759
761
760
762 def decode_files_sidedata(sidedata):
761 def decode_files_sidedata(sidedata):
763 md = ChangingFiles()
762 md = ChangingFiles()
764 raw = sidedata.get(sidedatamod.SD_FILES)
763 raw = sidedata.get(sidedatamod.SD_FILES)
765
764
766 if raw is None:
765 if raw is None:
767 return md
766 return md
768
767
769 copies = []
768 copies = []
770 all_files = []
769 all_files = []
771
770
772 assert len(raw) >= INDEX_HEADER.size
771 assert len(raw) >= INDEX_HEADER.size
773 total_files = INDEX_HEADER.unpack_from(raw, 0)[0]
772 total_files = INDEX_HEADER.unpack_from(raw, 0)[0]
774
773
775 offset = INDEX_HEADER.size
774 offset = INDEX_HEADER.size
776 file_offset_base = offset + (INDEX_ENTRY.size * total_files)
775 file_offset_base = offset + (INDEX_ENTRY.size * total_files)
777 file_offset_last = file_offset_base
776 file_offset_last = file_offset_base
778
777
779 assert len(raw) >= file_offset_base
778 assert len(raw) >= file_offset_base
780
779
781 for idx in range(total_files):
780 for idx in range(total_files):
782 flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)
781 flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)
783 file_end += file_offset_base
782 file_end += file_offset_base
784 filename = raw[file_offset_last:file_end]
783 filename = raw[file_offset_last:file_end]
785 filesize = file_end - file_offset_last
784 filesize = file_end - file_offset_last
786 assert len(filename) == filesize
785 assert len(filename) == filesize
787 offset += INDEX_ENTRY.size
786 offset += INDEX_ENTRY.size
788 file_offset_last = file_end
787 file_offset_last = file_end
789 all_files.append(filename)
788 all_files.append(filename)
790 if flag & ACTION_MASK == ADDED_FLAG:
789 if flag & ACTION_MASK == ADDED_FLAG:
791 md.mark_added(filename)
790 md.mark_added(filename)
792 elif flag & ACTION_MASK == MERGED_FLAG:
791 elif flag & ACTION_MASK == MERGED_FLAG:
793 md.mark_merged(filename)
792 md.mark_merged(filename)
794 elif flag & ACTION_MASK == REMOVED_FLAG:
793 elif flag & ACTION_MASK == REMOVED_FLAG:
795 md.mark_removed(filename)
794 md.mark_removed(filename)
796 elif flag & ACTION_MASK == SALVAGED_FLAG:
795 elif flag & ACTION_MASK == SALVAGED_FLAG:
797 md.mark_salvaged(filename)
796 md.mark_salvaged(filename)
798 elif flag & ACTION_MASK == TOUCHED_FLAG:
797 elif flag & ACTION_MASK == TOUCHED_FLAG:
799 md.mark_touched(filename)
798 md.mark_touched(filename)
800
799
801 copied = None
800 copied = None
802 if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:
801 if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:
803 copied = md.mark_copied_from_p1
802 copied = md.mark_copied_from_p1
804 elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:
803 elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:
805 copied = md.mark_copied_from_p2
804 copied = md.mark_copied_from_p2
806
805
807 if copied is not None:
806 if copied is not None:
808 copies.append((copied, filename, copy_idx))
807 copies.append((copied, filename, copy_idx))
809
808
810 for copied, filename, copy_idx in copies:
809 for copied, filename, copy_idx in copies:
811 copied(all_files[copy_idx], filename)
810 copied(all_files[copy_idx], filename)
812
811
813 return md
812 return md
814
813
815
814
816 def _getsidedata(srcrepo, rev):
815 def _getsidedata(srcrepo, rev):
817 ctx = srcrepo[rev]
816 ctx = srcrepo[rev]
818 files = compute_all_files_changes(ctx)
817 files = compute_all_files_changes(ctx)
819 return encode_files_sidedata(files), files.has_copies_info
818 return encode_files_sidedata(files), files.has_copies_info
820
819
821
820
822 def copies_sidedata_computer(repo, revlog, rev, existing_sidedata):
821 def copies_sidedata_computer(repo, revlog, rev, existing_sidedata):
823 sidedata, has_copies_info = _getsidedata(repo, rev)
822 sidedata, has_copies_info = _getsidedata(repo, rev)
824 flags_to_add = sidedataflag.REVIDX_HASCOPIESINFO if has_copies_info else 0
823 flags_to_add = sidedataflag.REVIDX_HASCOPIESINFO if has_copies_info else 0
825 return sidedata, (flags_to_add, 0)
824 return sidedata, (flags_to_add, 0)
826
825
827
826
828 def set_sidedata_spec_for_repo(repo):
827 def set_sidedata_spec_for_repo(repo):
829 if requirementsmod.COPIESSDC_REQUIREMENT in repo.requirements:
828 if requirementsmod.COPIESSDC_REQUIREMENT in repo.requirements:
830 repo.register_wanted_sidedata(sidedatamod.SD_FILES)
829 repo.register_wanted_sidedata(sidedatamod.SD_FILES)
831 repo.register_sidedata_computer(
830 repo.register_sidedata_computer(
832 revlogconst.KIND_CHANGELOG,
831 revlogconst.KIND_CHANGELOG,
833 sidedatamod.SD_FILES,
832 sidedatamod.SD_FILES,
834 (sidedatamod.SD_FILES,),
833 (sidedatamod.SD_FILES,),
835 copies_sidedata_computer,
834 copies_sidedata_computer,
836 sidedataflag.REVIDX_HASCOPIESINFO,
835 sidedataflag.REVIDX_HASCOPIESINFO,
837 )
836 )
838
837
839
838
840 def getsidedataadder(srcrepo, destrepo):
841 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
842 if pycompat.iswindows or not use_w:
843 return _get_simple_sidedata_adder(srcrepo, destrepo)
844 else:
845 return _get_worker_sidedata_adder(srcrepo, destrepo)
846
847
848 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
839 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
849 """The function used by worker precomputing sidedata
840 """The function used by worker precomputing sidedata
850
841
851 It read an input queue containing revision numbers
842 It read an input queue containing revision numbers
852 It write in an output queue containing (rev, <sidedata-map>)
843 It write in an output queue containing (rev, <sidedata-map>)
853
844
854 The `None` input value is used as a stop signal.
845 The `None` input value is used as a stop signal.
855
846
856 The `tokens` semaphore is user to avoid having too many unprocessed
847 The `tokens` semaphore is user to avoid having too many unprocessed
857 entries. The workers needs to acquire one token before fetching a task.
848 entries. The workers needs to acquire one token before fetching a task.
858 They will be released by the consumer of the produced data.
849 They will be released by the consumer of the produced data.
859 """
850 """
860 tokens.acquire()
851 tokens.acquire()
861 rev = revs_queue.get()
852 rev = revs_queue.get()
862 while rev is not None:
853 while rev is not None:
863 data = _getsidedata(srcrepo, rev)
854 data = _getsidedata(srcrepo, rev)
864 sidedata_queue.put((rev, data))
855 sidedata_queue.put((rev, data))
865 tokens.acquire()
856 tokens.acquire()
866 rev = revs_queue.get()
857 rev = revs_queue.get()
867 # processing of `None` is completed, release the token.
858 # processing of `None` is completed, release the token.
868 tokens.release()
859 tokens.release()
869
860
870
861
871 BUFF_PER_WORKER = 50
862 BUFF_PER_WORKER = 50
872
863
873
864
874 def _get_worker_sidedata_adder(srcrepo, destrepo):
865 def _get_worker_sidedata_adder(srcrepo, destrepo):
875 """The parallel version of the sidedata computation
866 """The parallel version of the sidedata computation
876
867
877 This code spawn a pool of worker that precompute a buffer of sidedata
868 This code spawn a pool of worker that precompute a buffer of sidedata
878 before we actually need them"""
869 before we actually need them"""
879 # avoid circular import copies -> scmutil -> worker -> copies
870 # avoid circular import copies -> scmutil -> worker -> copies
880 from . import worker
871 from . import worker
881
872
882 nbworkers = worker._numworkers(srcrepo.ui)
873 nbworkers = worker._numworkers(srcrepo.ui)
883
874
884 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
875 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
885 revsq = multiprocessing.Queue()
876 revsq = multiprocessing.Queue()
886 sidedataq = multiprocessing.Queue()
877 sidedataq = multiprocessing.Queue()
887
878
888 assert srcrepo.filtername is None
879 assert srcrepo.filtername is None
889 # queue all tasks beforehand, revision numbers are small and it make
880 # queue all tasks beforehand, revision numbers are small and it make
890 # synchronisation simpler
881 # synchronisation simpler
891 #
882 #
892 # Since the computation for each node can be quite expensive, the overhead
883 # Since the computation for each node can be quite expensive, the overhead
893 # of using a single queue is not revelant. In practice, most computation
884 # of using a single queue is not revelant. In practice, most computation
894 # are fast but some are very expensive and dominate all the other smaller
885 # are fast but some are very expensive and dominate all the other smaller
895 # cost.
886 # cost.
896 for r in srcrepo.changelog.revs():
887 for r in srcrepo.changelog.revs():
897 revsq.put(r)
888 revsq.put(r)
898 # queue the "no more tasks" markers
889 # queue the "no more tasks" markers
899 for i in range(nbworkers):
890 for i in range(nbworkers):
900 revsq.put(None)
891 revsq.put(None)
901
892
902 allworkers = []
893 allworkers = []
903 for i in range(nbworkers):
894 for i in range(nbworkers):
904 args = (srcrepo, revsq, sidedataq, tokens)
895 args = (srcrepo, revsq, sidedataq, tokens)
905 w = multiprocessing.Process(target=_sidedata_worker, args=args)
896 w = multiprocessing.Process(target=_sidedata_worker, args=args)
906 allworkers.append(w)
897 allworkers.append(w)
907 w.start()
898 w.start()
908
899
909 # dictionnary to store results for revision higher than we one we are
900 # dictionnary to store results for revision higher than we one we are
910 # looking for. For example, if we need the sidedatamap for 42, and 43 is
901 # looking for. For example, if we need the sidedatamap for 42, and 43 is
911 # received, when shelve 43 for later use.
902 # received, when shelve 43 for later use.
912 staging = {}
903 staging = {}
913
904
914 def sidedata_companion(revlog, rev):
905 def sidedata_companion(repo, revlog, rev, old_sidedata):
915 data = {}, False
906 # Is the data previously shelved ?
916 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
907 data = staging.pop(rev, None)
917 # Is the data previously shelved ?
908 if data is None:
918 data = staging.pop(rev, None)
909 # look at the queued result until we find the one we are lookig
919 if data is None:
910 # for (shelve the other ones)
920 # look at the queued result until we find the one we are lookig
911 r, data = sidedataq.get()
921 # for (shelve the other ones)
912 while r != rev:
913 staging[r] = data
922 r, data = sidedataq.get()
914 r, data = sidedataq.get()
923 while r != rev:
915 tokens.release()
924 staging[r] = data
925 r, data = sidedataq.get()
926 tokens.release()
927 sidedata, has_copies_info = data
916 sidedata, has_copies_info = data
928 new_flag = 0
917 new_flag = 0
929 if has_copies_info:
918 if has_copies_info:
930 new_flag = sidedataflag.REVIDX_HASCOPIESINFO
919 new_flag = sidedataflag.REVIDX_HASCOPIESINFO
931 return False, (), sidedata, new_flag, 0
920 return sidedata, (new_flag, 0)
932
921
933 return sidedata_companion
922 return sidedata_companion
934
935
936 def _get_simple_sidedata_adder(srcrepo, destrepo):
937 """The simple version of the sidedata computation
938
939 It just compute it in the same thread on request"""
940
941 def sidedatacompanion(revlog, rev):
942 sidedata, has_copies_info = {}, False
943 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
944 sidedata, has_copies_info = _getsidedata(srcrepo, rev)
945 new_flag = 0
946 if has_copies_info:
947 new_flag = sidedataflag.REVIDX_HASCOPIESINFO
948
949 return False, (), sidedata, new_flag, 0
950
951 return sidedatacompanion
952
953
954 def getsidedataremover(srcrepo, destrepo):
955 def sidedatacompanion(revlog, rev):
956 f = ()
957 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
958 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
959 f = (
960 sidedatamod.SD_P1COPIES,
961 sidedatamod.SD_P2COPIES,
962 sidedatamod.SD_FILESADDED,
963 sidedatamod.SD_FILESREMOVED,
964 )
965 return False, f, {}, 0, sidedataflag.REVIDX_HASCOPIESINFO
966
967 return sidedatacompanion
@@ -1,3145 +1,3129 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 INDEX_HEADER,
40 INDEX_HEADER,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_HASCOPIESINFO,
55 REVIDX_HASCOPIESINFO,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 )
58 )
59 from .thirdparty import attr
59 from .thirdparty import attr
60 from . import (
60 from . import (
61 ancestor,
61 ancestor,
62 dagop,
62 dagop,
63 error,
63 error,
64 mdiff,
64 mdiff,
65 policy,
65 policy,
66 pycompat,
66 pycompat,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .interfaces import (
70 from .interfaces import (
71 repository,
71 repository,
72 util as interfaceutil,
72 util as interfaceutil,
73 )
73 )
74 from .revlogutils import (
74 from .revlogutils import (
75 deltas as deltautil,
75 deltas as deltautil,
76 flagutil,
76 flagutil,
77 nodemap as nodemaputil,
77 nodemap as nodemaputil,
78 revlogv0,
78 revlogv0,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88
88 REVLOGV0
89 REVLOGV0
89 REVLOGV1
90 REVLOGV1
90 REVLOGV2
91 REVLOGV2
91 FLAG_INLINE_DATA
92 FLAG_INLINE_DATA
92 FLAG_GENERALDELTA
93 FLAG_GENERALDELTA
93 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_VERSION
96 REVLOG_DEFAULT_VERSION
96 REVLOGV1_FLAGS
97 REVLOGV1_FLAGS
97 REVLOGV2_FLAGS
98 REVLOGV2_FLAGS
98 REVIDX_ISCENSORED
99 REVIDX_ISCENSORED
99 REVIDX_ELLIPSIS
100 REVIDX_ELLIPSIS
100 REVIDX_HASCOPIESINFO
101 REVIDX_HASCOPIESINFO
101 REVIDX_EXTSTORED
102 REVIDX_EXTSTORED
102 REVIDX_DEFAULT_FLAGS
103 REVIDX_DEFAULT_FLAGS
103 REVIDX_FLAGS_ORDER
104 REVIDX_FLAGS_ORDER
104 REVIDX_RAWTEXT_CHANGING_FLAGS
105 REVIDX_RAWTEXT_CHANGING_FLAGS
105
106
106 parsers = policy.importmod('parsers')
107 parsers = policy.importmod('parsers')
107 rustancestor = policy.importrust('ancestor')
108 rustancestor = policy.importrust('ancestor')
108 rustdagop = policy.importrust('dagop')
109 rustdagop = policy.importrust('dagop')
109 rustrevlog = policy.importrust('revlog')
110 rustrevlog = policy.importrust('revlog')
110
111
111 # Aliased for performance.
112 # Aliased for performance.
112 _zlibdecompress = zlib.decompress
113 _zlibdecompress = zlib.decompress
113
114
114 # max size of revlog with inline data
115 # max size of revlog with inline data
115 _maxinline = 131072
116 _maxinline = 131072
116 _chunksize = 1048576
117 _chunksize = 1048576
117
118
118 # Flag processors for REVIDX_ELLIPSIS.
119 # Flag processors for REVIDX_ELLIPSIS.
119 def ellipsisreadprocessor(rl, text):
120 def ellipsisreadprocessor(rl, text):
120 return text, False
121 return text, False
121
122
122
123
123 def ellipsiswriteprocessor(rl, text):
124 def ellipsiswriteprocessor(rl, text):
124 return text, False
125 return text, False
125
126
126
127
127 def ellipsisrawprocessor(rl, text):
128 def ellipsisrawprocessor(rl, text):
128 return False
129 return False
129
130
130
131
131 ellipsisprocessor = (
132 ellipsisprocessor = (
132 ellipsisreadprocessor,
133 ellipsisreadprocessor,
133 ellipsiswriteprocessor,
134 ellipsiswriteprocessor,
134 ellipsisrawprocessor,
135 ellipsisrawprocessor,
135 )
136 )
136
137
137
138
138 def offset_type(offset, type):
139 def offset_type(offset, type):
139 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
140 raise ValueError(b'unknown revlog index flags')
141 raise ValueError(b'unknown revlog index flags')
141 return int(int(offset) << 16 | type)
142 return int(int(offset) << 16 | type)
142
143
143
144
144 def _verify_revision(rl, skipflags, state, node):
145 def _verify_revision(rl, skipflags, state, node):
145 """Verify the integrity of the given revlog ``node`` while providing a hook
146 """Verify the integrity of the given revlog ``node`` while providing a hook
146 point for extensions to influence the operation."""
147 point for extensions to influence the operation."""
147 if skipflags:
148 if skipflags:
148 state[b'skipread'].add(node)
149 state[b'skipread'].add(node)
149 else:
150 else:
150 # Side-effect: read content and verify hash.
151 # Side-effect: read content and verify hash.
151 rl.revision(node)
152 rl.revision(node)
152
153
153
154
154 # True if a fast implementation for persistent-nodemap is available
155 # True if a fast implementation for persistent-nodemap is available
155 #
156 #
156 # We also consider we have a "fast" implementation in "pure" python because
157 # We also consider we have a "fast" implementation in "pure" python because
157 # people using pure don't really have performance consideration (and a
158 # people using pure don't really have performance consideration (and a
158 # wheelbarrow of other slowness source)
159 # wheelbarrow of other slowness source)
159 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
160 parsers, 'BaseIndexObject'
161 parsers, 'BaseIndexObject'
161 )
162 )
162
163
163
164
164 @attr.s(slots=True, frozen=True)
165 @attr.s(slots=True, frozen=True)
165 class _revisioninfo(object):
166 class _revisioninfo(object):
166 """Information about a revision that allows building its fulltext
167 """Information about a revision that allows building its fulltext
167 node: expected hash of the revision
168 node: expected hash of the revision
168 p1, p2: parent revs of the revision
169 p1, p2: parent revs of the revision
169 btext: built text cache consisting of a one-element list
170 btext: built text cache consisting of a one-element list
170 cachedelta: (baserev, uncompressed_delta) or None
171 cachedelta: (baserev, uncompressed_delta) or None
171 flags: flags associated to the revision storage
172 flags: flags associated to the revision storage
172
173
173 One of btext[0] or cachedelta must be set.
174 One of btext[0] or cachedelta must be set.
174 """
175 """
175
176
176 node = attr.ib()
177 node = attr.ib()
177 p1 = attr.ib()
178 p1 = attr.ib()
178 p2 = attr.ib()
179 p2 = attr.ib()
179 btext = attr.ib()
180 btext = attr.ib()
180 textlen = attr.ib()
181 textlen = attr.ib()
181 cachedelta = attr.ib()
182 cachedelta = attr.ib()
182 flags = attr.ib()
183 flags = attr.ib()
183
184
184
185
185 @interfaceutil.implementer(repository.irevisiondelta)
186 @interfaceutil.implementer(repository.irevisiondelta)
186 @attr.s(slots=True)
187 @attr.s(slots=True)
187 class revlogrevisiondelta(object):
188 class revlogrevisiondelta(object):
188 node = attr.ib()
189 node = attr.ib()
189 p1node = attr.ib()
190 p1node = attr.ib()
190 p2node = attr.ib()
191 p2node = attr.ib()
191 basenode = attr.ib()
192 basenode = attr.ib()
192 flags = attr.ib()
193 flags = attr.ib()
193 baserevisionsize = attr.ib()
194 baserevisionsize = attr.ib()
194 revision = attr.ib()
195 revision = attr.ib()
195 delta = attr.ib()
196 delta = attr.ib()
196 sidedata = attr.ib()
197 sidedata = attr.ib()
197 protocol_flags = attr.ib()
198 protocol_flags = attr.ib()
198 linknode = attr.ib(default=None)
199 linknode = attr.ib(default=None)
199
200
200
201
201 @interfaceutil.implementer(repository.iverifyproblem)
202 @interfaceutil.implementer(repository.iverifyproblem)
202 @attr.s(frozen=True)
203 @attr.s(frozen=True)
203 class revlogproblem(object):
204 class revlogproblem(object):
204 warning = attr.ib(default=None)
205 warning = attr.ib(default=None)
205 error = attr.ib(default=None)
206 error = attr.ib(default=None)
206 node = attr.ib(default=None)
207 node = attr.ib(default=None)
207
208
208
209
209 def parse_index_v1(data, inline):
210 def parse_index_v1(data, inline):
210 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
211 index, cache = parsers.parse_index2(data, inline)
212 index, cache = parsers.parse_index2(data, inline)
212 return index, cache
213 return index, cache
213
214
214
215
215 def parse_index_v2(data, inline):
216 def parse_index_v2(data, inline):
216 # call the C implementation to parse the index data
217 # call the C implementation to parse the index data
217 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
218 return index, cache
219 return index, cache
219
220
220
221
221 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
222
223
223 def parse_index_v1_nodemap(data, inline):
224 def parse_index_v1_nodemap(data, inline):
224 index, cache = parsers.parse_index_devel_nodemap(data, inline)
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
225 return index, cache
226 return index, cache
226
227
227
228
228 else:
229 else:
229 parse_index_v1_nodemap = None
230 parse_index_v1_nodemap = None
230
231
231
232
232 def parse_index_v1_mixed(data, inline):
233 def parse_index_v1_mixed(data, inline):
233 index, cache = parse_index_v1(data, inline)
234 index, cache = parse_index_v1(data, inline)
234 return rustrevlog.MixedIndex(index), cache
235 return rustrevlog.MixedIndex(index), cache
235
236
236
237
237 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
238 # signed integer)
239 # signed integer)
239 _maxentrysize = 0x7FFFFFFF
240 _maxentrysize = 0x7FFFFFFF
240
241
241
242
242 class revlog(object):
243 class revlog(object):
243 """
244 """
244 the underlying revision storage object
245 the underlying revision storage object
245
246
246 A revlog consists of two parts, an index and the revision data.
247 A revlog consists of two parts, an index and the revision data.
247
248
248 The index is a file with a fixed record size containing
249 The index is a file with a fixed record size containing
249 information on each revision, including its nodeid (hash), the
250 information on each revision, including its nodeid (hash), the
250 nodeids of its parents, the position and offset of its data within
251 nodeids of its parents, the position and offset of its data within
251 the data file, and the revision it's based on. Finally, each entry
252 the data file, and the revision it's based on. Finally, each entry
252 contains a linkrev entry that can serve as a pointer to external
253 contains a linkrev entry that can serve as a pointer to external
253 data.
254 data.
254
255
255 The revision data itself is a linear collection of data chunks.
256 The revision data itself is a linear collection of data chunks.
256 Each chunk represents a revision and is usually represented as a
257 Each chunk represents a revision and is usually represented as a
257 delta against the previous chunk. To bound lookup time, runs of
258 delta against the previous chunk. To bound lookup time, runs of
258 deltas are limited to about 2 times the length of the original
259 deltas are limited to about 2 times the length of the original
259 version data. This makes retrieval of a version proportional to
260 version data. This makes retrieval of a version proportional to
260 its size, or O(1) relative to the number of revisions.
261 its size, or O(1) relative to the number of revisions.
261
262
262 Both pieces of the revlog are written to in an append-only
263 Both pieces of the revlog are written to in an append-only
263 fashion, which means we never need to rewrite a file to insert or
264 fashion, which means we never need to rewrite a file to insert or
264 remove data, and can use some simple techniques to avoid the need
265 remove data, and can use some simple techniques to avoid the need
265 for locking while reading.
266 for locking while reading.
266
267
267 If checkambig, indexfile is opened with checkambig=True at
268 If checkambig, indexfile is opened with checkambig=True at
268 writing, to avoid file stat ambiguity.
269 writing, to avoid file stat ambiguity.
269
270
270 If mmaplargeindex is True, and an mmapindexthreshold is set, the
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
271 index will be mmapped rather than read if it is larger than the
272 index will be mmapped rather than read if it is larger than the
272 configured threshold.
273 configured threshold.
273
274
274 If censorable is True, the revlog can have censored revisions.
275 If censorable is True, the revlog can have censored revisions.
275
276
276 If `upperboundcomp` is not None, this is the expected maximal gain from
277 If `upperboundcomp` is not None, this is the expected maximal gain from
277 compression for the data content.
278 compression for the data content.
278
279
279 `concurrencychecker` is an optional function that receives 3 arguments: a
280 `concurrencychecker` is an optional function that receives 3 arguments: a
280 file handle, a filename, and an expected position. It should check whether
281 file handle, a filename, and an expected position. It should check whether
281 the current position in the file handle is valid, and log/warn/fail (by
282 the current position in the file handle is valid, and log/warn/fail (by
282 raising).
283 raising).
283 """
284 """
284
285
285 _flagserrorclass = error.RevlogError
286 _flagserrorclass = error.RevlogError
286
287
287 def __init__(
288 def __init__(
288 self,
289 self,
289 opener,
290 opener,
290 target,
291 target,
291 indexfile=None,
292 indexfile=None,
292 datafile=None,
293 datafile=None,
293 checkambig=False,
294 checkambig=False,
294 mmaplargeindex=False,
295 mmaplargeindex=False,
295 censorable=False,
296 censorable=False,
296 upperboundcomp=None,
297 upperboundcomp=None,
297 persistentnodemap=False,
298 persistentnodemap=False,
298 concurrencychecker=None,
299 concurrencychecker=None,
299 ):
300 ):
300 """
301 """
301 create a revlog object
302 create a revlog object
302
303
303 opener is a function that abstracts the file opening operation
304 opener is a function that abstracts the file opening operation
304 and can be used to implement COW semantics or the like.
305 and can be used to implement COW semantics or the like.
305
306
306 `target`: a (KIND, ID) tuple that identify the content stored in
307 `target`: a (KIND, ID) tuple that identify the content stored in
307 this revlog. It help the rest of the code to understand what the revlog
308 this revlog. It help the rest of the code to understand what the revlog
308 is about without having to resort to heuristic and index filename
309 is about without having to resort to heuristic and index filename
309 analysis. Note: that this must be reliably be set by normal code, but
310 analysis. Note: that this must be reliably be set by normal code, but
310 that test, debug, or performance measurement code might not set this to
311 that test, debug, or performance measurement code might not set this to
311 accurate value.
312 accurate value.
312 """
313 """
313 self.upperboundcomp = upperboundcomp
314 self.upperboundcomp = upperboundcomp
314 self.indexfile = indexfile
315 self.indexfile = indexfile
315 self.datafile = datafile or (indexfile[:-2] + b".d")
316 self.datafile = datafile or (indexfile[:-2] + b".d")
316 self.nodemap_file = None
317 self.nodemap_file = None
317 if persistentnodemap:
318 if persistentnodemap:
318 self.nodemap_file = nodemaputil.get_nodemap_file(
319 self.nodemap_file = nodemaputil.get_nodemap_file(
319 opener, self.indexfile
320 opener, self.indexfile
320 )
321 )
321
322
322 self.opener = opener
323 self.opener = opener
323 assert target[0] in ALL_KINDS
324 assert target[0] in ALL_KINDS
324 assert len(target) == 2
325 assert len(target) == 2
325 self.target = target
326 self.target = target
326 # When True, indexfile is opened with checkambig=True at writing, to
327 # When True, indexfile is opened with checkambig=True at writing, to
327 # avoid file stat ambiguity.
328 # avoid file stat ambiguity.
328 self._checkambig = checkambig
329 self._checkambig = checkambig
329 self._mmaplargeindex = mmaplargeindex
330 self._mmaplargeindex = mmaplargeindex
330 self._censorable = censorable
331 self._censorable = censorable
331 # 3-tuple of (node, rev, text) for a raw revision.
332 # 3-tuple of (node, rev, text) for a raw revision.
332 self._revisioncache = None
333 self._revisioncache = None
333 # Maps rev to chain base rev.
334 # Maps rev to chain base rev.
334 self._chainbasecache = util.lrucachedict(100)
335 self._chainbasecache = util.lrucachedict(100)
335 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
336 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
336 self._chunkcache = (0, b'')
337 self._chunkcache = (0, b'')
337 # How much data to read and cache into the raw revlog data cache.
338 # How much data to read and cache into the raw revlog data cache.
338 self._chunkcachesize = 65536
339 self._chunkcachesize = 65536
339 self._maxchainlen = None
340 self._maxchainlen = None
340 self._deltabothparents = True
341 self._deltabothparents = True
341 self.index = None
342 self.index = None
342 self._nodemap_docket = None
343 self._nodemap_docket = None
343 # Mapping of partial identifiers to full nodes.
344 # Mapping of partial identifiers to full nodes.
344 self._pcache = {}
345 self._pcache = {}
345 # Mapping of revision integer to full node.
346 # Mapping of revision integer to full node.
346 self._compengine = b'zlib'
347 self._compengine = b'zlib'
347 self._compengineopts = {}
348 self._compengineopts = {}
348 self._maxdeltachainspan = -1
349 self._maxdeltachainspan = -1
349 self._withsparseread = False
350 self._withsparseread = False
350 self._sparserevlog = False
351 self._sparserevlog = False
351 self._srdensitythreshold = 0.50
352 self._srdensitythreshold = 0.50
352 self._srmingapsize = 262144
353 self._srmingapsize = 262144
353
354
354 # Make copy of flag processors so each revlog instance can support
355 # Make copy of flag processors so each revlog instance can support
355 # custom flags.
356 # custom flags.
356 self._flagprocessors = dict(flagutil.flagprocessors)
357 self._flagprocessors = dict(flagutil.flagprocessors)
357
358
358 # 2-tuple of file handles being used for active writing.
359 # 2-tuple of file handles being used for active writing.
359 self._writinghandles = None
360 self._writinghandles = None
360
361
361 self._loadindex()
362 self._loadindex()
362
363
363 self._concurrencychecker = concurrencychecker
364 self._concurrencychecker = concurrencychecker
364
365
365 def _loadindex(self):
366 def _loadindex(self):
366 mmapindexthreshold = None
367 mmapindexthreshold = None
367 opts = self.opener.options
368 opts = self.opener.options
368
369
369 if b'revlogv2' in opts:
370 if b'revlogv2' in opts:
370 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
371 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
371 elif b'revlogv1' in opts:
372 elif b'revlogv1' in opts:
372 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
373 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
373 if b'generaldelta' in opts:
374 if b'generaldelta' in opts:
374 newversionflags |= FLAG_GENERALDELTA
375 newversionflags |= FLAG_GENERALDELTA
375 elif b'revlogv0' in self.opener.options:
376 elif b'revlogv0' in self.opener.options:
376 newversionflags = REVLOGV0
377 newversionflags = REVLOGV0
377 else:
378 else:
378 newversionflags = REVLOG_DEFAULT_VERSION
379 newversionflags = REVLOG_DEFAULT_VERSION
379
380
380 if b'chunkcachesize' in opts:
381 if b'chunkcachesize' in opts:
381 self._chunkcachesize = opts[b'chunkcachesize']
382 self._chunkcachesize = opts[b'chunkcachesize']
382 if b'maxchainlen' in opts:
383 if b'maxchainlen' in opts:
383 self._maxchainlen = opts[b'maxchainlen']
384 self._maxchainlen = opts[b'maxchainlen']
384 if b'deltabothparents' in opts:
385 if b'deltabothparents' in opts:
385 self._deltabothparents = opts[b'deltabothparents']
386 self._deltabothparents = opts[b'deltabothparents']
386 self._lazydelta = bool(opts.get(b'lazydelta', True))
387 self._lazydelta = bool(opts.get(b'lazydelta', True))
387 self._lazydeltabase = False
388 self._lazydeltabase = False
388 if self._lazydelta:
389 if self._lazydelta:
389 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
390 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
390 if b'compengine' in opts:
391 if b'compengine' in opts:
391 self._compengine = opts[b'compengine']
392 self._compengine = opts[b'compengine']
392 if b'zlib.level' in opts:
393 if b'zlib.level' in opts:
393 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
394 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
394 if b'zstd.level' in opts:
395 if b'zstd.level' in opts:
395 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
396 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
396 if b'maxdeltachainspan' in opts:
397 if b'maxdeltachainspan' in opts:
397 self._maxdeltachainspan = opts[b'maxdeltachainspan']
398 self._maxdeltachainspan = opts[b'maxdeltachainspan']
398 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
399 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
399 mmapindexthreshold = opts[b'mmapindexthreshold']
400 mmapindexthreshold = opts[b'mmapindexthreshold']
400 self.hassidedata = bool(opts.get(b'side-data', False))
401 self.hassidedata = bool(opts.get(b'side-data', False))
401 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
402 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
402 withsparseread = bool(opts.get(b'with-sparse-read', False))
403 withsparseread = bool(opts.get(b'with-sparse-read', False))
403 # sparse-revlog forces sparse-read
404 # sparse-revlog forces sparse-read
404 self._withsparseread = self._sparserevlog or withsparseread
405 self._withsparseread = self._sparserevlog or withsparseread
405 if b'sparse-read-density-threshold' in opts:
406 if b'sparse-read-density-threshold' in opts:
406 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
407 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
407 if b'sparse-read-min-gap-size' in opts:
408 if b'sparse-read-min-gap-size' in opts:
408 self._srmingapsize = opts[b'sparse-read-min-gap-size']
409 self._srmingapsize = opts[b'sparse-read-min-gap-size']
409 if opts.get(b'enableellipsis'):
410 if opts.get(b'enableellipsis'):
410 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
411 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
411
412
412 # revlog v0 doesn't have flag processors
413 # revlog v0 doesn't have flag processors
413 for flag, processor in pycompat.iteritems(
414 for flag, processor in pycompat.iteritems(
414 opts.get(b'flagprocessors', {})
415 opts.get(b'flagprocessors', {})
415 ):
416 ):
416 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
417 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
417
418
418 if self._chunkcachesize <= 0:
419 if self._chunkcachesize <= 0:
419 raise error.RevlogError(
420 raise error.RevlogError(
420 _(b'revlog chunk cache size %r is not greater than 0')
421 _(b'revlog chunk cache size %r is not greater than 0')
421 % self._chunkcachesize
422 % self._chunkcachesize
422 )
423 )
423 elif self._chunkcachesize & (self._chunkcachesize - 1):
424 elif self._chunkcachesize & (self._chunkcachesize - 1):
424 raise error.RevlogError(
425 raise error.RevlogError(
425 _(b'revlog chunk cache size %r is not a power of 2')
426 _(b'revlog chunk cache size %r is not a power of 2')
426 % self._chunkcachesize
427 % self._chunkcachesize
427 )
428 )
428
429
429 indexdata = b''
430 indexdata = b''
430 self._initempty = True
431 self._initempty = True
431 try:
432 try:
432 with self._indexfp() as f:
433 with self._indexfp() as f:
433 if (
434 if (
434 mmapindexthreshold is not None
435 mmapindexthreshold is not None
435 and self.opener.fstat(f).st_size >= mmapindexthreshold
436 and self.opener.fstat(f).st_size >= mmapindexthreshold
436 ):
437 ):
437 # TODO: should .close() to release resources without
438 # TODO: should .close() to release resources without
438 # relying on Python GC
439 # relying on Python GC
439 indexdata = util.buffer(util.mmapread(f))
440 indexdata = util.buffer(util.mmapread(f))
440 else:
441 else:
441 indexdata = f.read()
442 indexdata = f.read()
442 if len(indexdata) > 0:
443 if len(indexdata) > 0:
443 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
444 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
444 self._initempty = False
445 self._initempty = False
445 else:
446 else:
446 versionflags = newversionflags
447 versionflags = newversionflags
447 except IOError as inst:
448 except IOError as inst:
448 if inst.errno != errno.ENOENT:
449 if inst.errno != errno.ENOENT:
449 raise
450 raise
450
451
451 versionflags = newversionflags
452 versionflags = newversionflags
452
453
453 self.version = versionflags
454 self.version = versionflags
454
455
455 flags = versionflags & ~0xFFFF
456 flags = versionflags & ~0xFFFF
456 fmt = versionflags & 0xFFFF
457 fmt = versionflags & 0xFFFF
457
458
458 if fmt == REVLOGV0:
459 if fmt == REVLOGV0:
459 if flags:
460 if flags:
460 raise error.RevlogError(
461 raise error.RevlogError(
461 _(b'unknown flags (%#04x) in version %d revlog %s')
462 _(b'unknown flags (%#04x) in version %d revlog %s')
462 % (flags >> 16, fmt, self.indexfile)
463 % (flags >> 16, fmt, self.indexfile)
463 )
464 )
464
465
465 self._inline = False
466 self._inline = False
466 self._generaldelta = False
467 self._generaldelta = False
467
468
468 elif fmt == REVLOGV1:
469 elif fmt == REVLOGV1:
469 if flags & ~REVLOGV1_FLAGS:
470 if flags & ~REVLOGV1_FLAGS:
470 raise error.RevlogError(
471 raise error.RevlogError(
471 _(b'unknown flags (%#04x) in version %d revlog %s')
472 _(b'unknown flags (%#04x) in version %d revlog %s')
472 % (flags >> 16, fmt, self.indexfile)
473 % (flags >> 16, fmt, self.indexfile)
473 )
474 )
474
475
475 self._inline = versionflags & FLAG_INLINE_DATA
476 self._inline = versionflags & FLAG_INLINE_DATA
476 self._generaldelta = versionflags & FLAG_GENERALDELTA
477 self._generaldelta = versionflags & FLAG_GENERALDELTA
477
478
478 elif fmt == REVLOGV2:
479 elif fmt == REVLOGV2:
479 if flags & ~REVLOGV2_FLAGS:
480 if flags & ~REVLOGV2_FLAGS:
480 raise error.RevlogError(
481 raise error.RevlogError(
481 _(b'unknown flags (%#04x) in version %d revlog %s')
482 _(b'unknown flags (%#04x) in version %d revlog %s')
482 % (flags >> 16, fmt, self.indexfile)
483 % (flags >> 16, fmt, self.indexfile)
483 )
484 )
484
485
485 # There is a bug in the transaction handling when going from an
486 # There is a bug in the transaction handling when going from an
486 # inline revlog to a separate index and data file. Turn it off until
487 # inline revlog to a separate index and data file. Turn it off until
487 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
488 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
488 # See issue6485
489 # See issue6485
489 self._inline = False
490 self._inline = False
490 # generaldelta implied by version 2 revlogs.
491 # generaldelta implied by version 2 revlogs.
491 self._generaldelta = True
492 self._generaldelta = True
492
493
493 else:
494 else:
494 raise error.RevlogError(
495 raise error.RevlogError(
495 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
496 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
496 )
497 )
497
498
498 self.nodeconstants = sha1nodeconstants
499 self.nodeconstants = sha1nodeconstants
499 self.nullid = self.nodeconstants.nullid
500 self.nullid = self.nodeconstants.nullid
500
501
501 # sparse-revlog can't be on without general-delta (issue6056)
502 # sparse-revlog can't be on without general-delta (issue6056)
502 if not self._generaldelta:
503 if not self._generaldelta:
503 self._sparserevlog = False
504 self._sparserevlog = False
504
505
505 self._storedeltachains = True
506 self._storedeltachains = True
506
507
507 devel_nodemap = (
508 devel_nodemap = (
508 self.nodemap_file
509 self.nodemap_file
509 and opts.get(b'devel-force-nodemap', False)
510 and opts.get(b'devel-force-nodemap', False)
510 and parse_index_v1_nodemap is not None
511 and parse_index_v1_nodemap is not None
511 )
512 )
512
513
513 use_rust_index = False
514 use_rust_index = False
514 if rustrevlog is not None:
515 if rustrevlog is not None:
515 if self.nodemap_file is not None:
516 if self.nodemap_file is not None:
516 use_rust_index = True
517 use_rust_index = True
517 else:
518 else:
518 use_rust_index = self.opener.options.get(b'rust.index')
519 use_rust_index = self.opener.options.get(b'rust.index')
519
520
520 self._parse_index = parse_index_v1
521 self._parse_index = parse_index_v1
521 if self.version == REVLOGV0:
522 if self.version == REVLOGV0:
522 self._parse_index = revlogv0.parse_index_v0
523 self._parse_index = revlogv0.parse_index_v0
523 elif fmt == REVLOGV2:
524 elif fmt == REVLOGV2:
524 self._parse_index = parse_index_v2
525 self._parse_index = parse_index_v2
525 elif devel_nodemap:
526 elif devel_nodemap:
526 self._parse_index = parse_index_v1_nodemap
527 self._parse_index = parse_index_v1_nodemap
527 elif use_rust_index:
528 elif use_rust_index:
528 self._parse_index = parse_index_v1_mixed
529 self._parse_index = parse_index_v1_mixed
529 try:
530 try:
530 d = self._parse_index(indexdata, self._inline)
531 d = self._parse_index(indexdata, self._inline)
531 index, _chunkcache = d
532 index, _chunkcache = d
532 use_nodemap = (
533 use_nodemap = (
533 not self._inline
534 not self._inline
534 and self.nodemap_file is not None
535 and self.nodemap_file is not None
535 and util.safehasattr(index, 'update_nodemap_data')
536 and util.safehasattr(index, 'update_nodemap_data')
536 )
537 )
537 if use_nodemap:
538 if use_nodemap:
538 nodemap_data = nodemaputil.persisted_data(self)
539 nodemap_data = nodemaputil.persisted_data(self)
539 if nodemap_data is not None:
540 if nodemap_data is not None:
540 docket = nodemap_data[0]
541 docket = nodemap_data[0]
541 if (
542 if (
542 len(d[0]) > docket.tip_rev
543 len(d[0]) > docket.tip_rev
543 and d[0][docket.tip_rev][7] == docket.tip_node
544 and d[0][docket.tip_rev][7] == docket.tip_node
544 ):
545 ):
545 # no changelog tampering
546 # no changelog tampering
546 self._nodemap_docket = docket
547 self._nodemap_docket = docket
547 index.update_nodemap_data(*nodemap_data)
548 index.update_nodemap_data(*nodemap_data)
548 except (ValueError, IndexError):
549 except (ValueError, IndexError):
549 raise error.RevlogError(
550 raise error.RevlogError(
550 _(b"index %s is corrupted") % self.indexfile
551 _(b"index %s is corrupted") % self.indexfile
551 )
552 )
552 self.index, self._chunkcache = d
553 self.index, self._chunkcache = d
553 if not self._chunkcache:
554 if not self._chunkcache:
554 self._chunkclear()
555 self._chunkclear()
555 # revnum -> (chain-length, sum-delta-length)
556 # revnum -> (chain-length, sum-delta-length)
556 self._chaininfocache = util.lrucachedict(500)
557 self._chaininfocache = util.lrucachedict(500)
557 # revlog header -> revlog compressor
558 # revlog header -> revlog compressor
558 self._decompressors = {}
559 self._decompressors = {}
559
560
560 @util.propertycache
561 @util.propertycache
561 def revlog_kind(self):
562 def revlog_kind(self):
562 return self.target[0]
563 return self.target[0]
563
564
564 @util.propertycache
565 @util.propertycache
565 def _compressor(self):
566 def _compressor(self):
566 engine = util.compengines[self._compengine]
567 engine = util.compengines[self._compengine]
567 return engine.revlogcompressor(self._compengineopts)
568 return engine.revlogcompressor(self._compengineopts)
568
569
569 def _indexfp(self, mode=b'r'):
570 def _indexfp(self, mode=b'r'):
570 """file object for the revlog's index file"""
571 """file object for the revlog's index file"""
571 args = {'mode': mode}
572 args = {'mode': mode}
572 if mode != b'r':
573 if mode != b'r':
573 args['checkambig'] = self._checkambig
574 args['checkambig'] = self._checkambig
574 if mode == b'w':
575 if mode == b'w':
575 args['atomictemp'] = True
576 args['atomictemp'] = True
576 return self.opener(self.indexfile, **args)
577 return self.opener(self.indexfile, **args)
577
578
578 def _datafp(self, mode=b'r'):
579 def _datafp(self, mode=b'r'):
579 """file object for the revlog's data file"""
580 """file object for the revlog's data file"""
580 return self.opener(self.datafile, mode=mode)
581 return self.opener(self.datafile, mode=mode)
581
582
582 @contextlib.contextmanager
583 @contextlib.contextmanager
583 def _datareadfp(self, existingfp=None):
584 def _datareadfp(self, existingfp=None):
584 """file object suitable to read data"""
585 """file object suitable to read data"""
585 # Use explicit file handle, if given.
586 # Use explicit file handle, if given.
586 if existingfp is not None:
587 if existingfp is not None:
587 yield existingfp
588 yield existingfp
588
589
589 # Use a file handle being actively used for writes, if available.
590 # Use a file handle being actively used for writes, if available.
590 # There is some danger to doing this because reads will seek the
591 # There is some danger to doing this because reads will seek the
591 # file. However, _writeentry() performs a SEEK_END before all writes,
592 # file. However, _writeentry() performs a SEEK_END before all writes,
592 # so we should be safe.
593 # so we should be safe.
593 elif self._writinghandles:
594 elif self._writinghandles:
594 if self._inline:
595 if self._inline:
595 yield self._writinghandles[0]
596 yield self._writinghandles[0]
596 else:
597 else:
597 yield self._writinghandles[1]
598 yield self._writinghandles[1]
598
599
599 # Otherwise open a new file handle.
600 # Otherwise open a new file handle.
600 else:
601 else:
601 if self._inline:
602 if self._inline:
602 func = self._indexfp
603 func = self._indexfp
603 else:
604 else:
604 func = self._datafp
605 func = self._datafp
605 with func() as fp:
606 with func() as fp:
606 yield fp
607 yield fp
607
608
608 def tiprev(self):
609 def tiprev(self):
609 return len(self.index) - 1
610 return len(self.index) - 1
610
611
611 def tip(self):
612 def tip(self):
612 return self.node(self.tiprev())
613 return self.node(self.tiprev())
613
614
614 def __contains__(self, rev):
615 def __contains__(self, rev):
615 return 0 <= rev < len(self)
616 return 0 <= rev < len(self)
616
617
617 def __len__(self):
618 def __len__(self):
618 return len(self.index)
619 return len(self.index)
619
620
620 def __iter__(self):
621 def __iter__(self):
621 return iter(pycompat.xrange(len(self)))
622 return iter(pycompat.xrange(len(self)))
622
623
623 def revs(self, start=0, stop=None):
624 def revs(self, start=0, stop=None):
624 """iterate over all rev in this revlog (from start to stop)"""
625 """iterate over all rev in this revlog (from start to stop)"""
625 return storageutil.iterrevs(len(self), start=start, stop=stop)
626 return storageutil.iterrevs(len(self), start=start, stop=stop)
626
627
627 @property
628 @property
628 def nodemap(self):
629 def nodemap(self):
629 msg = (
630 msg = (
630 b"revlog.nodemap is deprecated, "
631 b"revlog.nodemap is deprecated, "
631 b"use revlog.index.[has_node|rev|get_rev]"
632 b"use revlog.index.[has_node|rev|get_rev]"
632 )
633 )
633 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
634 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
634 return self.index.nodemap
635 return self.index.nodemap
635
636
636 @property
637 @property
637 def _nodecache(self):
638 def _nodecache(self):
638 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
639 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
639 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
640 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
640 return self.index.nodemap
641 return self.index.nodemap
641
642
642 def hasnode(self, node):
643 def hasnode(self, node):
643 try:
644 try:
644 self.rev(node)
645 self.rev(node)
645 return True
646 return True
646 except KeyError:
647 except KeyError:
647 return False
648 return False
648
649
649 def candelta(self, baserev, rev):
650 def candelta(self, baserev, rev):
650 """whether two revisions (baserev, rev) can be delta-ed or not"""
651 """whether two revisions (baserev, rev) can be delta-ed or not"""
651 # Disable delta if either rev requires a content-changing flag
652 # Disable delta if either rev requires a content-changing flag
652 # processor (ex. LFS). This is because such flag processor can alter
653 # processor (ex. LFS). This is because such flag processor can alter
653 # the rawtext content that the delta will be based on, and two clients
654 # the rawtext content that the delta will be based on, and two clients
654 # could have a same revlog node with different flags (i.e. different
655 # could have a same revlog node with different flags (i.e. different
655 # rawtext contents) and the delta could be incompatible.
656 # rawtext contents) and the delta could be incompatible.
656 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
657 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
657 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
658 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
658 ):
659 ):
659 return False
660 return False
660 return True
661 return True
661
662
662 def update_caches(self, transaction):
663 def update_caches(self, transaction):
663 if self.nodemap_file is not None:
664 if self.nodemap_file is not None:
664 if transaction is None:
665 if transaction is None:
665 nodemaputil.update_persistent_nodemap(self)
666 nodemaputil.update_persistent_nodemap(self)
666 else:
667 else:
667 nodemaputil.setup_persistent_nodemap(transaction, self)
668 nodemaputil.setup_persistent_nodemap(transaction, self)
668
669
669 def clearcaches(self):
670 def clearcaches(self):
670 self._revisioncache = None
671 self._revisioncache = None
671 self._chainbasecache.clear()
672 self._chainbasecache.clear()
672 self._chunkcache = (0, b'')
673 self._chunkcache = (0, b'')
673 self._pcache = {}
674 self._pcache = {}
674 self._nodemap_docket = None
675 self._nodemap_docket = None
675 self.index.clearcaches()
676 self.index.clearcaches()
676 # The python code is the one responsible for validating the docket, we
677 # The python code is the one responsible for validating the docket, we
677 # end up having to refresh it here.
678 # end up having to refresh it here.
678 use_nodemap = (
679 use_nodemap = (
679 not self._inline
680 not self._inline
680 and self.nodemap_file is not None
681 and self.nodemap_file is not None
681 and util.safehasattr(self.index, 'update_nodemap_data')
682 and util.safehasattr(self.index, 'update_nodemap_data')
682 )
683 )
683 if use_nodemap:
684 if use_nodemap:
684 nodemap_data = nodemaputil.persisted_data(self)
685 nodemap_data = nodemaputil.persisted_data(self)
685 if nodemap_data is not None:
686 if nodemap_data is not None:
686 self._nodemap_docket = nodemap_data[0]
687 self._nodemap_docket = nodemap_data[0]
687 self.index.update_nodemap_data(*nodemap_data)
688 self.index.update_nodemap_data(*nodemap_data)
688
689
689 def rev(self, node):
690 def rev(self, node):
690 try:
691 try:
691 return self.index.rev(node)
692 return self.index.rev(node)
692 except TypeError:
693 except TypeError:
693 raise
694 raise
694 except error.RevlogError:
695 except error.RevlogError:
695 # parsers.c radix tree lookup failed
696 # parsers.c radix tree lookup failed
696 if (
697 if (
697 node == self.nodeconstants.wdirid
698 node == self.nodeconstants.wdirid
698 or node in self.nodeconstants.wdirfilenodeids
699 or node in self.nodeconstants.wdirfilenodeids
699 ):
700 ):
700 raise error.WdirUnsupported
701 raise error.WdirUnsupported
701 raise error.LookupError(node, self.indexfile, _(b'no node'))
702 raise error.LookupError(node, self.indexfile, _(b'no node'))
702
703
703 # Accessors for index entries.
704 # Accessors for index entries.
704
705
705 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
706 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
706 # are flags.
707 # are flags.
707 def start(self, rev):
708 def start(self, rev):
708 return int(self.index[rev][0] >> 16)
709 return int(self.index[rev][0] >> 16)
709
710
710 def flags(self, rev):
711 def flags(self, rev):
711 return self.index[rev][0] & 0xFFFF
712 return self.index[rev][0] & 0xFFFF
712
713
713 def length(self, rev):
714 def length(self, rev):
714 return self.index[rev][1]
715 return self.index[rev][1]
715
716
716 def sidedata_length(self, rev):
717 def sidedata_length(self, rev):
717 if self.version & 0xFFFF != REVLOGV2:
718 if self.version & 0xFFFF != REVLOGV2:
718 return 0
719 return 0
719 return self.index[rev][9]
720 return self.index[rev][9]
720
721
721 def rawsize(self, rev):
722 def rawsize(self, rev):
722 """return the length of the uncompressed text for a given revision"""
723 """return the length of the uncompressed text for a given revision"""
723 l = self.index[rev][2]
724 l = self.index[rev][2]
724 if l >= 0:
725 if l >= 0:
725 return l
726 return l
726
727
727 t = self.rawdata(rev)
728 t = self.rawdata(rev)
728 return len(t)
729 return len(t)
729
730
730 def size(self, rev):
731 def size(self, rev):
731 """length of non-raw text (processed by a "read" flag processor)"""
732 """length of non-raw text (processed by a "read" flag processor)"""
732 # fast path: if no "read" flag processor could change the content,
733 # fast path: if no "read" flag processor could change the content,
733 # size is rawsize. note: ELLIPSIS is known to not change the content.
734 # size is rawsize. note: ELLIPSIS is known to not change the content.
734 flags = self.flags(rev)
735 flags = self.flags(rev)
735 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
736 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
736 return self.rawsize(rev)
737 return self.rawsize(rev)
737
738
738 return len(self.revision(rev, raw=False))
739 return len(self.revision(rev, raw=False))
739
740
740 def chainbase(self, rev):
741 def chainbase(self, rev):
741 base = self._chainbasecache.get(rev)
742 base = self._chainbasecache.get(rev)
742 if base is not None:
743 if base is not None:
743 return base
744 return base
744
745
745 index = self.index
746 index = self.index
746 iterrev = rev
747 iterrev = rev
747 base = index[iterrev][3]
748 base = index[iterrev][3]
748 while base != iterrev:
749 while base != iterrev:
749 iterrev = base
750 iterrev = base
750 base = index[iterrev][3]
751 base = index[iterrev][3]
751
752
752 self._chainbasecache[rev] = base
753 self._chainbasecache[rev] = base
753 return base
754 return base
754
755
755 def linkrev(self, rev):
756 def linkrev(self, rev):
756 return self.index[rev][4]
757 return self.index[rev][4]
757
758
758 def parentrevs(self, rev):
759 def parentrevs(self, rev):
759 try:
760 try:
760 entry = self.index[rev]
761 entry = self.index[rev]
761 except IndexError:
762 except IndexError:
762 if rev == wdirrev:
763 if rev == wdirrev:
763 raise error.WdirUnsupported
764 raise error.WdirUnsupported
764 raise
765 raise
765 if entry[5] == nullrev:
766 if entry[5] == nullrev:
766 return entry[6], entry[5]
767 return entry[6], entry[5]
767 else:
768 else:
768 return entry[5], entry[6]
769 return entry[5], entry[6]
769
770
770 # fast parentrevs(rev) where rev isn't filtered
771 # fast parentrevs(rev) where rev isn't filtered
771 _uncheckedparentrevs = parentrevs
772 _uncheckedparentrevs = parentrevs
772
773
773 def node(self, rev):
774 def node(self, rev):
774 try:
775 try:
775 return self.index[rev][7]
776 return self.index[rev][7]
776 except IndexError:
777 except IndexError:
777 if rev == wdirrev:
778 if rev == wdirrev:
778 raise error.WdirUnsupported
779 raise error.WdirUnsupported
779 raise
780 raise
780
781
781 # Derived from index values.
782 # Derived from index values.
782
783
783 def end(self, rev):
784 def end(self, rev):
784 return self.start(rev) + self.length(rev)
785 return self.start(rev) + self.length(rev)
785
786
786 def parents(self, node):
787 def parents(self, node):
787 i = self.index
788 i = self.index
788 d = i[self.rev(node)]
789 d = i[self.rev(node)]
789 # inline node() to avoid function call overhead
790 # inline node() to avoid function call overhead
790 if d[5] == self.nullid:
791 if d[5] == self.nullid:
791 return i[d[6]][7], i[d[5]][7]
792 return i[d[6]][7], i[d[5]][7]
792 else:
793 else:
793 return i[d[5]][7], i[d[6]][7]
794 return i[d[5]][7], i[d[6]][7]
794
795
795 def chainlen(self, rev):
796 def chainlen(self, rev):
796 return self._chaininfo(rev)[0]
797 return self._chaininfo(rev)[0]
797
798
798 def _chaininfo(self, rev):
799 def _chaininfo(self, rev):
799 chaininfocache = self._chaininfocache
800 chaininfocache = self._chaininfocache
800 if rev in chaininfocache:
801 if rev in chaininfocache:
801 return chaininfocache[rev]
802 return chaininfocache[rev]
802 index = self.index
803 index = self.index
803 generaldelta = self._generaldelta
804 generaldelta = self._generaldelta
804 iterrev = rev
805 iterrev = rev
805 e = index[iterrev]
806 e = index[iterrev]
806 clen = 0
807 clen = 0
807 compresseddeltalen = 0
808 compresseddeltalen = 0
808 while iterrev != e[3]:
809 while iterrev != e[3]:
809 clen += 1
810 clen += 1
810 compresseddeltalen += e[1]
811 compresseddeltalen += e[1]
811 if generaldelta:
812 if generaldelta:
812 iterrev = e[3]
813 iterrev = e[3]
813 else:
814 else:
814 iterrev -= 1
815 iterrev -= 1
815 if iterrev in chaininfocache:
816 if iterrev in chaininfocache:
816 t = chaininfocache[iterrev]
817 t = chaininfocache[iterrev]
817 clen += t[0]
818 clen += t[0]
818 compresseddeltalen += t[1]
819 compresseddeltalen += t[1]
819 break
820 break
820 e = index[iterrev]
821 e = index[iterrev]
821 else:
822 else:
822 # Add text length of base since decompressing that also takes
823 # Add text length of base since decompressing that also takes
823 # work. For cache hits the length is already included.
824 # work. For cache hits the length is already included.
824 compresseddeltalen += e[1]
825 compresseddeltalen += e[1]
825 r = (clen, compresseddeltalen)
826 r = (clen, compresseddeltalen)
826 chaininfocache[rev] = r
827 chaininfocache[rev] = r
827 return r
828 return r
828
829
829 def _deltachain(self, rev, stoprev=None):
830 def _deltachain(self, rev, stoprev=None):
830 """Obtain the delta chain for a revision.
831 """Obtain the delta chain for a revision.
831
832
832 ``stoprev`` specifies a revision to stop at. If not specified, we
833 ``stoprev`` specifies a revision to stop at. If not specified, we
833 stop at the base of the chain.
834 stop at the base of the chain.
834
835
835 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
836 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
836 revs in ascending order and ``stopped`` is a bool indicating whether
837 revs in ascending order and ``stopped`` is a bool indicating whether
837 ``stoprev`` was hit.
838 ``stoprev`` was hit.
838 """
839 """
839 # Try C implementation.
840 # Try C implementation.
840 try:
841 try:
841 return self.index.deltachain(rev, stoprev, self._generaldelta)
842 return self.index.deltachain(rev, stoprev, self._generaldelta)
842 except AttributeError:
843 except AttributeError:
843 pass
844 pass
844
845
845 chain = []
846 chain = []
846
847
847 # Alias to prevent attribute lookup in tight loop.
848 # Alias to prevent attribute lookup in tight loop.
848 index = self.index
849 index = self.index
849 generaldelta = self._generaldelta
850 generaldelta = self._generaldelta
850
851
851 iterrev = rev
852 iterrev = rev
852 e = index[iterrev]
853 e = index[iterrev]
853 while iterrev != e[3] and iterrev != stoprev:
854 while iterrev != e[3] and iterrev != stoprev:
854 chain.append(iterrev)
855 chain.append(iterrev)
855 if generaldelta:
856 if generaldelta:
856 iterrev = e[3]
857 iterrev = e[3]
857 else:
858 else:
858 iterrev -= 1
859 iterrev -= 1
859 e = index[iterrev]
860 e = index[iterrev]
860
861
861 if iterrev == stoprev:
862 if iterrev == stoprev:
862 stopped = True
863 stopped = True
863 else:
864 else:
864 chain.append(iterrev)
865 chain.append(iterrev)
865 stopped = False
866 stopped = False
866
867
867 chain.reverse()
868 chain.reverse()
868 return chain, stopped
869 return chain, stopped
869
870
870 def ancestors(self, revs, stoprev=0, inclusive=False):
871 def ancestors(self, revs, stoprev=0, inclusive=False):
871 """Generate the ancestors of 'revs' in reverse revision order.
872 """Generate the ancestors of 'revs' in reverse revision order.
872 Does not generate revs lower than stoprev.
873 Does not generate revs lower than stoprev.
873
874
874 See the documentation for ancestor.lazyancestors for more details."""
875 See the documentation for ancestor.lazyancestors for more details."""
875
876
876 # first, make sure start revisions aren't filtered
877 # first, make sure start revisions aren't filtered
877 revs = list(revs)
878 revs = list(revs)
878 checkrev = self.node
879 checkrev = self.node
879 for r in revs:
880 for r in revs:
880 checkrev(r)
881 checkrev(r)
881 # and we're sure ancestors aren't filtered as well
882 # and we're sure ancestors aren't filtered as well
882
883
883 if rustancestor is not None:
884 if rustancestor is not None:
884 lazyancestors = rustancestor.LazyAncestors
885 lazyancestors = rustancestor.LazyAncestors
885 arg = self.index
886 arg = self.index
886 else:
887 else:
887 lazyancestors = ancestor.lazyancestors
888 lazyancestors = ancestor.lazyancestors
888 arg = self._uncheckedparentrevs
889 arg = self._uncheckedparentrevs
889 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
890 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
890
891
891 def descendants(self, revs):
892 def descendants(self, revs):
892 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
893 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
893
894
894 def findcommonmissing(self, common=None, heads=None):
895 def findcommonmissing(self, common=None, heads=None):
895 """Return a tuple of the ancestors of common and the ancestors of heads
896 """Return a tuple of the ancestors of common and the ancestors of heads
896 that are not ancestors of common. In revset terminology, we return the
897 that are not ancestors of common. In revset terminology, we return the
897 tuple:
898 tuple:
898
899
899 ::common, (::heads) - (::common)
900 ::common, (::heads) - (::common)
900
901
901 The list is sorted by revision number, meaning it is
902 The list is sorted by revision number, meaning it is
902 topologically sorted.
903 topologically sorted.
903
904
904 'heads' and 'common' are both lists of node IDs. If heads is
905 'heads' and 'common' are both lists of node IDs. If heads is
905 not supplied, uses all of the revlog's heads. If common is not
906 not supplied, uses all of the revlog's heads. If common is not
906 supplied, uses nullid."""
907 supplied, uses nullid."""
907 if common is None:
908 if common is None:
908 common = [self.nullid]
909 common = [self.nullid]
909 if heads is None:
910 if heads is None:
910 heads = self.heads()
911 heads = self.heads()
911
912
912 common = [self.rev(n) for n in common]
913 common = [self.rev(n) for n in common]
913 heads = [self.rev(n) for n in heads]
914 heads = [self.rev(n) for n in heads]
914
915
915 # we want the ancestors, but inclusive
916 # we want the ancestors, but inclusive
916 class lazyset(object):
917 class lazyset(object):
917 def __init__(self, lazyvalues):
918 def __init__(self, lazyvalues):
918 self.addedvalues = set()
919 self.addedvalues = set()
919 self.lazyvalues = lazyvalues
920 self.lazyvalues = lazyvalues
920
921
921 def __contains__(self, value):
922 def __contains__(self, value):
922 return value in self.addedvalues or value in self.lazyvalues
923 return value in self.addedvalues or value in self.lazyvalues
923
924
924 def __iter__(self):
925 def __iter__(self):
925 added = self.addedvalues
926 added = self.addedvalues
926 for r in added:
927 for r in added:
927 yield r
928 yield r
928 for r in self.lazyvalues:
929 for r in self.lazyvalues:
929 if not r in added:
930 if not r in added:
930 yield r
931 yield r
931
932
932 def add(self, value):
933 def add(self, value):
933 self.addedvalues.add(value)
934 self.addedvalues.add(value)
934
935
935 def update(self, values):
936 def update(self, values):
936 self.addedvalues.update(values)
937 self.addedvalues.update(values)
937
938
938 has = lazyset(self.ancestors(common))
939 has = lazyset(self.ancestors(common))
939 has.add(nullrev)
940 has.add(nullrev)
940 has.update(common)
941 has.update(common)
941
942
942 # take all ancestors from heads that aren't in has
943 # take all ancestors from heads that aren't in has
943 missing = set()
944 missing = set()
944 visit = collections.deque(r for r in heads if r not in has)
945 visit = collections.deque(r for r in heads if r not in has)
945 while visit:
946 while visit:
946 r = visit.popleft()
947 r = visit.popleft()
947 if r in missing:
948 if r in missing:
948 continue
949 continue
949 else:
950 else:
950 missing.add(r)
951 missing.add(r)
951 for p in self.parentrevs(r):
952 for p in self.parentrevs(r):
952 if p not in has:
953 if p not in has:
953 visit.append(p)
954 visit.append(p)
954 missing = list(missing)
955 missing = list(missing)
955 missing.sort()
956 missing.sort()
956 return has, [self.node(miss) for miss in missing]
957 return has, [self.node(miss) for miss in missing]
957
958
958 def incrementalmissingrevs(self, common=None):
959 def incrementalmissingrevs(self, common=None):
959 """Return an object that can be used to incrementally compute the
960 """Return an object that can be used to incrementally compute the
960 revision numbers of the ancestors of arbitrary sets that are not
961 revision numbers of the ancestors of arbitrary sets that are not
961 ancestors of common. This is an ancestor.incrementalmissingancestors
962 ancestors of common. This is an ancestor.incrementalmissingancestors
962 object.
963 object.
963
964
964 'common' is a list of revision numbers. If common is not supplied, uses
965 'common' is a list of revision numbers. If common is not supplied, uses
965 nullrev.
966 nullrev.
966 """
967 """
967 if common is None:
968 if common is None:
968 common = [nullrev]
969 common = [nullrev]
969
970
970 if rustancestor is not None:
971 if rustancestor is not None:
971 return rustancestor.MissingAncestors(self.index, common)
972 return rustancestor.MissingAncestors(self.index, common)
972 return ancestor.incrementalmissingancestors(self.parentrevs, common)
973 return ancestor.incrementalmissingancestors(self.parentrevs, common)
973
974
974 def findmissingrevs(self, common=None, heads=None):
975 def findmissingrevs(self, common=None, heads=None):
975 """Return the revision numbers of the ancestors of heads that
976 """Return the revision numbers of the ancestors of heads that
976 are not ancestors of common.
977 are not ancestors of common.
977
978
978 More specifically, return a list of revision numbers corresponding to
979 More specifically, return a list of revision numbers corresponding to
979 nodes N such that every N satisfies the following constraints:
980 nodes N such that every N satisfies the following constraints:
980
981
981 1. N is an ancestor of some node in 'heads'
982 1. N is an ancestor of some node in 'heads'
982 2. N is not an ancestor of any node in 'common'
983 2. N is not an ancestor of any node in 'common'
983
984
984 The list is sorted by revision number, meaning it is
985 The list is sorted by revision number, meaning it is
985 topologically sorted.
986 topologically sorted.
986
987
987 'heads' and 'common' are both lists of revision numbers. If heads is
988 'heads' and 'common' are both lists of revision numbers. If heads is
988 not supplied, uses all of the revlog's heads. If common is not
989 not supplied, uses all of the revlog's heads. If common is not
989 supplied, uses nullid."""
990 supplied, uses nullid."""
990 if common is None:
991 if common is None:
991 common = [nullrev]
992 common = [nullrev]
992 if heads is None:
993 if heads is None:
993 heads = self.headrevs()
994 heads = self.headrevs()
994
995
995 inc = self.incrementalmissingrevs(common=common)
996 inc = self.incrementalmissingrevs(common=common)
996 return inc.missingancestors(heads)
997 return inc.missingancestors(heads)
997
998
998 def findmissing(self, common=None, heads=None):
999 def findmissing(self, common=None, heads=None):
999 """Return the ancestors of heads that are not ancestors of common.
1000 """Return the ancestors of heads that are not ancestors of common.
1000
1001
1001 More specifically, return a list of nodes N such that every N
1002 More specifically, return a list of nodes N such that every N
1002 satisfies the following constraints:
1003 satisfies the following constraints:
1003
1004
1004 1. N is an ancestor of some node in 'heads'
1005 1. N is an ancestor of some node in 'heads'
1005 2. N is not an ancestor of any node in 'common'
1006 2. N is not an ancestor of any node in 'common'
1006
1007
1007 The list is sorted by revision number, meaning it is
1008 The list is sorted by revision number, meaning it is
1008 topologically sorted.
1009 topologically sorted.
1009
1010
1010 'heads' and 'common' are both lists of node IDs. If heads is
1011 'heads' and 'common' are both lists of node IDs. If heads is
1011 not supplied, uses all of the revlog's heads. If common is not
1012 not supplied, uses all of the revlog's heads. If common is not
1012 supplied, uses nullid."""
1013 supplied, uses nullid."""
1013 if common is None:
1014 if common is None:
1014 common = [self.nullid]
1015 common = [self.nullid]
1015 if heads is None:
1016 if heads is None:
1016 heads = self.heads()
1017 heads = self.heads()
1017
1018
1018 common = [self.rev(n) for n in common]
1019 common = [self.rev(n) for n in common]
1019 heads = [self.rev(n) for n in heads]
1020 heads = [self.rev(n) for n in heads]
1020
1021
1021 inc = self.incrementalmissingrevs(common=common)
1022 inc = self.incrementalmissingrevs(common=common)
1022 return [self.node(r) for r in inc.missingancestors(heads)]
1023 return [self.node(r) for r in inc.missingancestors(heads)]
1023
1024
1024 def nodesbetween(self, roots=None, heads=None):
1025 def nodesbetween(self, roots=None, heads=None):
1025 """Return a topological path from 'roots' to 'heads'.
1026 """Return a topological path from 'roots' to 'heads'.
1026
1027
1027 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1028 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1028 topologically sorted list of all nodes N that satisfy both of
1029 topologically sorted list of all nodes N that satisfy both of
1029 these constraints:
1030 these constraints:
1030
1031
1031 1. N is a descendant of some node in 'roots'
1032 1. N is a descendant of some node in 'roots'
1032 2. N is an ancestor of some node in 'heads'
1033 2. N is an ancestor of some node in 'heads'
1033
1034
1034 Every node is considered to be both a descendant and an ancestor
1035 Every node is considered to be both a descendant and an ancestor
1035 of itself, so every reachable node in 'roots' and 'heads' will be
1036 of itself, so every reachable node in 'roots' and 'heads' will be
1036 included in 'nodes'.
1037 included in 'nodes'.
1037
1038
1038 'outroots' is the list of reachable nodes in 'roots', i.e., the
1039 'outroots' is the list of reachable nodes in 'roots', i.e., the
1039 subset of 'roots' that is returned in 'nodes'. Likewise,
1040 subset of 'roots' that is returned in 'nodes'. Likewise,
1040 'outheads' is the subset of 'heads' that is also in 'nodes'.
1041 'outheads' is the subset of 'heads' that is also in 'nodes'.
1041
1042
1042 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1043 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1043 unspecified, uses nullid as the only root. If 'heads' is
1044 unspecified, uses nullid as the only root. If 'heads' is
1044 unspecified, uses list of all of the revlog's heads."""
1045 unspecified, uses list of all of the revlog's heads."""
1045 nonodes = ([], [], [])
1046 nonodes = ([], [], [])
1046 if roots is not None:
1047 if roots is not None:
1047 roots = list(roots)
1048 roots = list(roots)
1048 if not roots:
1049 if not roots:
1049 return nonodes
1050 return nonodes
1050 lowestrev = min([self.rev(n) for n in roots])
1051 lowestrev = min([self.rev(n) for n in roots])
1051 else:
1052 else:
1052 roots = [self.nullid] # Everybody's a descendant of nullid
1053 roots = [self.nullid] # Everybody's a descendant of nullid
1053 lowestrev = nullrev
1054 lowestrev = nullrev
1054 if (lowestrev == nullrev) and (heads is None):
1055 if (lowestrev == nullrev) and (heads is None):
1055 # We want _all_ the nodes!
1056 # We want _all_ the nodes!
1056 return (
1057 return (
1057 [self.node(r) for r in self],
1058 [self.node(r) for r in self],
1058 [self.nullid],
1059 [self.nullid],
1059 list(self.heads()),
1060 list(self.heads()),
1060 )
1061 )
1061 if heads is None:
1062 if heads is None:
1062 # All nodes are ancestors, so the latest ancestor is the last
1063 # All nodes are ancestors, so the latest ancestor is the last
1063 # node.
1064 # node.
1064 highestrev = len(self) - 1
1065 highestrev = len(self) - 1
1065 # Set ancestors to None to signal that every node is an ancestor.
1066 # Set ancestors to None to signal that every node is an ancestor.
1066 ancestors = None
1067 ancestors = None
1067 # Set heads to an empty dictionary for later discovery of heads
1068 # Set heads to an empty dictionary for later discovery of heads
1068 heads = {}
1069 heads = {}
1069 else:
1070 else:
1070 heads = list(heads)
1071 heads = list(heads)
1071 if not heads:
1072 if not heads:
1072 return nonodes
1073 return nonodes
1073 ancestors = set()
1074 ancestors = set()
1074 # Turn heads into a dictionary so we can remove 'fake' heads.
1075 # Turn heads into a dictionary so we can remove 'fake' heads.
1075 # Also, later we will be using it to filter out the heads we can't
1076 # Also, later we will be using it to filter out the heads we can't
1076 # find from roots.
1077 # find from roots.
1077 heads = dict.fromkeys(heads, False)
1078 heads = dict.fromkeys(heads, False)
1078 # Start at the top and keep marking parents until we're done.
1079 # Start at the top and keep marking parents until we're done.
1079 nodestotag = set(heads)
1080 nodestotag = set(heads)
1080 # Remember where the top was so we can use it as a limit later.
1081 # Remember where the top was so we can use it as a limit later.
1081 highestrev = max([self.rev(n) for n in nodestotag])
1082 highestrev = max([self.rev(n) for n in nodestotag])
1082 while nodestotag:
1083 while nodestotag:
1083 # grab a node to tag
1084 # grab a node to tag
1084 n = nodestotag.pop()
1085 n = nodestotag.pop()
1085 # Never tag nullid
1086 # Never tag nullid
1086 if n == self.nullid:
1087 if n == self.nullid:
1087 continue
1088 continue
1088 # A node's revision number represents its place in a
1089 # A node's revision number represents its place in a
1089 # topologically sorted list of nodes.
1090 # topologically sorted list of nodes.
1090 r = self.rev(n)
1091 r = self.rev(n)
1091 if r >= lowestrev:
1092 if r >= lowestrev:
1092 if n not in ancestors:
1093 if n not in ancestors:
1093 # If we are possibly a descendant of one of the roots
1094 # If we are possibly a descendant of one of the roots
1094 # and we haven't already been marked as an ancestor
1095 # and we haven't already been marked as an ancestor
1095 ancestors.add(n) # Mark as ancestor
1096 ancestors.add(n) # Mark as ancestor
1096 # Add non-nullid parents to list of nodes to tag.
1097 # Add non-nullid parents to list of nodes to tag.
1097 nodestotag.update(
1098 nodestotag.update(
1098 [p for p in self.parents(n) if p != self.nullid]
1099 [p for p in self.parents(n) if p != self.nullid]
1099 )
1100 )
1100 elif n in heads: # We've seen it before, is it a fake head?
1101 elif n in heads: # We've seen it before, is it a fake head?
1101 # So it is, real heads should not be the ancestors of
1102 # So it is, real heads should not be the ancestors of
1102 # any other heads.
1103 # any other heads.
1103 heads.pop(n)
1104 heads.pop(n)
1104 if not ancestors:
1105 if not ancestors:
1105 return nonodes
1106 return nonodes
1106 # Now that we have our set of ancestors, we want to remove any
1107 # Now that we have our set of ancestors, we want to remove any
1107 # roots that are not ancestors.
1108 # roots that are not ancestors.
1108
1109
1109 # If one of the roots was nullid, everything is included anyway.
1110 # If one of the roots was nullid, everything is included anyway.
1110 if lowestrev > nullrev:
1111 if lowestrev > nullrev:
1111 # But, since we weren't, let's recompute the lowest rev to not
1112 # But, since we weren't, let's recompute the lowest rev to not
1112 # include roots that aren't ancestors.
1113 # include roots that aren't ancestors.
1113
1114
1114 # Filter out roots that aren't ancestors of heads
1115 # Filter out roots that aren't ancestors of heads
1115 roots = [root for root in roots if root in ancestors]
1116 roots = [root for root in roots if root in ancestors]
1116 # Recompute the lowest revision
1117 # Recompute the lowest revision
1117 if roots:
1118 if roots:
1118 lowestrev = min([self.rev(root) for root in roots])
1119 lowestrev = min([self.rev(root) for root in roots])
1119 else:
1120 else:
1120 # No more roots? Return empty list
1121 # No more roots? Return empty list
1121 return nonodes
1122 return nonodes
1122 else:
1123 else:
1123 # We are descending from nullid, and don't need to care about
1124 # We are descending from nullid, and don't need to care about
1124 # any other roots.
1125 # any other roots.
1125 lowestrev = nullrev
1126 lowestrev = nullrev
1126 roots = [self.nullid]
1127 roots = [self.nullid]
1127 # Transform our roots list into a set.
1128 # Transform our roots list into a set.
1128 descendants = set(roots)
1129 descendants = set(roots)
1129 # Also, keep the original roots so we can filter out roots that aren't
1130 # Also, keep the original roots so we can filter out roots that aren't
1130 # 'real' roots (i.e. are descended from other roots).
1131 # 'real' roots (i.e. are descended from other roots).
1131 roots = descendants.copy()
1132 roots = descendants.copy()
1132 # Our topologically sorted list of output nodes.
1133 # Our topologically sorted list of output nodes.
1133 orderedout = []
1134 orderedout = []
1134 # Don't start at nullid since we don't want nullid in our output list,
1135 # Don't start at nullid since we don't want nullid in our output list,
1135 # and if nullid shows up in descendants, empty parents will look like
1136 # and if nullid shows up in descendants, empty parents will look like
1136 # they're descendants.
1137 # they're descendants.
1137 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1138 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1138 n = self.node(r)
1139 n = self.node(r)
1139 isdescendant = False
1140 isdescendant = False
1140 if lowestrev == nullrev: # Everybody is a descendant of nullid
1141 if lowestrev == nullrev: # Everybody is a descendant of nullid
1141 isdescendant = True
1142 isdescendant = True
1142 elif n in descendants:
1143 elif n in descendants:
1143 # n is already a descendant
1144 # n is already a descendant
1144 isdescendant = True
1145 isdescendant = True
1145 # This check only needs to be done here because all the roots
1146 # This check only needs to be done here because all the roots
1146 # will start being marked is descendants before the loop.
1147 # will start being marked is descendants before the loop.
1147 if n in roots:
1148 if n in roots:
1148 # If n was a root, check if it's a 'real' root.
1149 # If n was a root, check if it's a 'real' root.
1149 p = tuple(self.parents(n))
1150 p = tuple(self.parents(n))
1150 # If any of its parents are descendants, it's not a root.
1151 # If any of its parents are descendants, it's not a root.
1151 if (p[0] in descendants) or (p[1] in descendants):
1152 if (p[0] in descendants) or (p[1] in descendants):
1152 roots.remove(n)
1153 roots.remove(n)
1153 else:
1154 else:
1154 p = tuple(self.parents(n))
1155 p = tuple(self.parents(n))
1155 # A node is a descendant if either of its parents are
1156 # A node is a descendant if either of its parents are
1156 # descendants. (We seeded the dependents list with the roots
1157 # descendants. (We seeded the dependents list with the roots
1157 # up there, remember?)
1158 # up there, remember?)
1158 if (p[0] in descendants) or (p[1] in descendants):
1159 if (p[0] in descendants) or (p[1] in descendants):
1159 descendants.add(n)
1160 descendants.add(n)
1160 isdescendant = True
1161 isdescendant = True
1161 if isdescendant and ((ancestors is None) or (n in ancestors)):
1162 if isdescendant and ((ancestors is None) or (n in ancestors)):
1162 # Only include nodes that are both descendants and ancestors.
1163 # Only include nodes that are both descendants and ancestors.
1163 orderedout.append(n)
1164 orderedout.append(n)
1164 if (ancestors is not None) and (n in heads):
1165 if (ancestors is not None) and (n in heads):
1165 # We're trying to figure out which heads are reachable
1166 # We're trying to figure out which heads are reachable
1166 # from roots.
1167 # from roots.
1167 # Mark this head as having been reached
1168 # Mark this head as having been reached
1168 heads[n] = True
1169 heads[n] = True
1169 elif ancestors is None:
1170 elif ancestors is None:
1170 # Otherwise, we're trying to discover the heads.
1171 # Otherwise, we're trying to discover the heads.
1171 # Assume this is a head because if it isn't, the next step
1172 # Assume this is a head because if it isn't, the next step
1172 # will eventually remove it.
1173 # will eventually remove it.
1173 heads[n] = True
1174 heads[n] = True
1174 # But, obviously its parents aren't.
1175 # But, obviously its parents aren't.
1175 for p in self.parents(n):
1176 for p in self.parents(n):
1176 heads.pop(p, None)
1177 heads.pop(p, None)
1177 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1178 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1178 roots = list(roots)
1179 roots = list(roots)
1179 assert orderedout
1180 assert orderedout
1180 assert roots
1181 assert roots
1181 assert heads
1182 assert heads
1182 return (orderedout, roots, heads)
1183 return (orderedout, roots, heads)
1183
1184
1184 def headrevs(self, revs=None):
1185 def headrevs(self, revs=None):
1185 if revs is None:
1186 if revs is None:
1186 try:
1187 try:
1187 return self.index.headrevs()
1188 return self.index.headrevs()
1188 except AttributeError:
1189 except AttributeError:
1189 return self._headrevs()
1190 return self._headrevs()
1190 if rustdagop is not None:
1191 if rustdagop is not None:
1191 return rustdagop.headrevs(self.index, revs)
1192 return rustdagop.headrevs(self.index, revs)
1192 return dagop.headrevs(revs, self._uncheckedparentrevs)
1193 return dagop.headrevs(revs, self._uncheckedparentrevs)
1193
1194
1194 def computephases(self, roots):
1195 def computephases(self, roots):
1195 return self.index.computephasesmapsets(roots)
1196 return self.index.computephasesmapsets(roots)
1196
1197
1197 def _headrevs(self):
1198 def _headrevs(self):
1198 count = len(self)
1199 count = len(self)
1199 if not count:
1200 if not count:
1200 return [nullrev]
1201 return [nullrev]
1201 # we won't iter over filtered rev so nobody is a head at start
1202 # we won't iter over filtered rev so nobody is a head at start
1202 ishead = [0] * (count + 1)
1203 ishead = [0] * (count + 1)
1203 index = self.index
1204 index = self.index
1204 for r in self:
1205 for r in self:
1205 ishead[r] = 1 # I may be an head
1206 ishead[r] = 1 # I may be an head
1206 e = index[r]
1207 e = index[r]
1207 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1208 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1208 return [r for r, val in enumerate(ishead) if val]
1209 return [r for r, val in enumerate(ishead) if val]
1209
1210
1210 def heads(self, start=None, stop=None):
1211 def heads(self, start=None, stop=None):
1211 """return the list of all nodes that have no children
1212 """return the list of all nodes that have no children
1212
1213
1213 if start is specified, only heads that are descendants of
1214 if start is specified, only heads that are descendants of
1214 start will be returned
1215 start will be returned
1215 if stop is specified, it will consider all the revs from stop
1216 if stop is specified, it will consider all the revs from stop
1216 as if they had no children
1217 as if they had no children
1217 """
1218 """
1218 if start is None and stop is None:
1219 if start is None and stop is None:
1219 if not len(self):
1220 if not len(self):
1220 return [self.nullid]
1221 return [self.nullid]
1221 return [self.node(r) for r in self.headrevs()]
1222 return [self.node(r) for r in self.headrevs()]
1222
1223
1223 if start is None:
1224 if start is None:
1224 start = nullrev
1225 start = nullrev
1225 else:
1226 else:
1226 start = self.rev(start)
1227 start = self.rev(start)
1227
1228
1228 stoprevs = {self.rev(n) for n in stop or []}
1229 stoprevs = {self.rev(n) for n in stop or []}
1229
1230
1230 revs = dagop.headrevssubset(
1231 revs = dagop.headrevssubset(
1231 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1232 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1232 )
1233 )
1233
1234
1234 return [self.node(rev) for rev in revs]
1235 return [self.node(rev) for rev in revs]
1235
1236
1236 def children(self, node):
1237 def children(self, node):
1237 """find the children of a given node"""
1238 """find the children of a given node"""
1238 c = []
1239 c = []
1239 p = self.rev(node)
1240 p = self.rev(node)
1240 for r in self.revs(start=p + 1):
1241 for r in self.revs(start=p + 1):
1241 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1242 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1242 if prevs:
1243 if prevs:
1243 for pr in prevs:
1244 for pr in prevs:
1244 if pr == p:
1245 if pr == p:
1245 c.append(self.node(r))
1246 c.append(self.node(r))
1246 elif p == nullrev:
1247 elif p == nullrev:
1247 c.append(self.node(r))
1248 c.append(self.node(r))
1248 return c
1249 return c
1249
1250
1250 def commonancestorsheads(self, a, b):
1251 def commonancestorsheads(self, a, b):
1251 """calculate all the heads of the common ancestors of nodes a and b"""
1252 """calculate all the heads of the common ancestors of nodes a and b"""
1252 a, b = self.rev(a), self.rev(b)
1253 a, b = self.rev(a), self.rev(b)
1253 ancs = self._commonancestorsheads(a, b)
1254 ancs = self._commonancestorsheads(a, b)
1254 return pycompat.maplist(self.node, ancs)
1255 return pycompat.maplist(self.node, ancs)
1255
1256
1256 def _commonancestorsheads(self, *revs):
1257 def _commonancestorsheads(self, *revs):
1257 """calculate all the heads of the common ancestors of revs"""
1258 """calculate all the heads of the common ancestors of revs"""
1258 try:
1259 try:
1259 ancs = self.index.commonancestorsheads(*revs)
1260 ancs = self.index.commonancestorsheads(*revs)
1260 except (AttributeError, OverflowError): # C implementation failed
1261 except (AttributeError, OverflowError): # C implementation failed
1261 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1262 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1262 return ancs
1263 return ancs
1263
1264
1264 def isancestor(self, a, b):
1265 def isancestor(self, a, b):
1265 """return True if node a is an ancestor of node b
1266 """return True if node a is an ancestor of node b
1266
1267
1267 A revision is considered an ancestor of itself."""
1268 A revision is considered an ancestor of itself."""
1268 a, b = self.rev(a), self.rev(b)
1269 a, b = self.rev(a), self.rev(b)
1269 return self.isancestorrev(a, b)
1270 return self.isancestorrev(a, b)
1270
1271
1271 def isancestorrev(self, a, b):
1272 def isancestorrev(self, a, b):
1272 """return True if revision a is an ancestor of revision b
1273 """return True if revision a is an ancestor of revision b
1273
1274
1274 A revision is considered an ancestor of itself.
1275 A revision is considered an ancestor of itself.
1275
1276
1276 The implementation of this is trivial but the use of
1277 The implementation of this is trivial but the use of
1277 reachableroots is not."""
1278 reachableroots is not."""
1278 if a == nullrev:
1279 if a == nullrev:
1279 return True
1280 return True
1280 elif a == b:
1281 elif a == b:
1281 return True
1282 return True
1282 elif a > b:
1283 elif a > b:
1283 return False
1284 return False
1284 return bool(self.reachableroots(a, [b], [a], includepath=False))
1285 return bool(self.reachableroots(a, [b], [a], includepath=False))
1285
1286
1286 def reachableroots(self, minroot, heads, roots, includepath=False):
1287 def reachableroots(self, minroot, heads, roots, includepath=False):
1287 """return (heads(::(<roots> and <roots>::<heads>)))
1288 """return (heads(::(<roots> and <roots>::<heads>)))
1288
1289
1289 If includepath is True, return (<roots>::<heads>)."""
1290 If includepath is True, return (<roots>::<heads>)."""
1290 try:
1291 try:
1291 return self.index.reachableroots2(
1292 return self.index.reachableroots2(
1292 minroot, heads, roots, includepath
1293 minroot, heads, roots, includepath
1293 )
1294 )
1294 except AttributeError:
1295 except AttributeError:
1295 return dagop._reachablerootspure(
1296 return dagop._reachablerootspure(
1296 self.parentrevs, minroot, roots, heads, includepath
1297 self.parentrevs, minroot, roots, heads, includepath
1297 )
1298 )
1298
1299
1299 def ancestor(self, a, b):
1300 def ancestor(self, a, b):
1300 """calculate the "best" common ancestor of nodes a and b"""
1301 """calculate the "best" common ancestor of nodes a and b"""
1301
1302
1302 a, b = self.rev(a), self.rev(b)
1303 a, b = self.rev(a), self.rev(b)
1303 try:
1304 try:
1304 ancs = self.index.ancestors(a, b)
1305 ancs = self.index.ancestors(a, b)
1305 except (AttributeError, OverflowError):
1306 except (AttributeError, OverflowError):
1306 ancs = ancestor.ancestors(self.parentrevs, a, b)
1307 ancs = ancestor.ancestors(self.parentrevs, a, b)
1307 if ancs:
1308 if ancs:
1308 # choose a consistent winner when there's a tie
1309 # choose a consistent winner when there's a tie
1309 return min(map(self.node, ancs))
1310 return min(map(self.node, ancs))
1310 return self.nullid
1311 return self.nullid
1311
1312
1312 def _match(self, id):
1313 def _match(self, id):
1313 if isinstance(id, int):
1314 if isinstance(id, int):
1314 # rev
1315 # rev
1315 return self.node(id)
1316 return self.node(id)
1316 if len(id) == self.nodeconstants.nodelen:
1317 if len(id) == self.nodeconstants.nodelen:
1317 # possibly a binary node
1318 # possibly a binary node
1318 # odds of a binary node being all hex in ASCII are 1 in 10**25
1319 # odds of a binary node being all hex in ASCII are 1 in 10**25
1319 try:
1320 try:
1320 node = id
1321 node = id
1321 self.rev(node) # quick search the index
1322 self.rev(node) # quick search the index
1322 return node
1323 return node
1323 except error.LookupError:
1324 except error.LookupError:
1324 pass # may be partial hex id
1325 pass # may be partial hex id
1325 try:
1326 try:
1326 # str(rev)
1327 # str(rev)
1327 rev = int(id)
1328 rev = int(id)
1328 if b"%d" % rev != id:
1329 if b"%d" % rev != id:
1329 raise ValueError
1330 raise ValueError
1330 if rev < 0:
1331 if rev < 0:
1331 rev = len(self) + rev
1332 rev = len(self) + rev
1332 if rev < 0 or rev >= len(self):
1333 if rev < 0 or rev >= len(self):
1333 raise ValueError
1334 raise ValueError
1334 return self.node(rev)
1335 return self.node(rev)
1335 except (ValueError, OverflowError):
1336 except (ValueError, OverflowError):
1336 pass
1337 pass
1337 if len(id) == 2 * self.nodeconstants.nodelen:
1338 if len(id) == 2 * self.nodeconstants.nodelen:
1338 try:
1339 try:
1339 # a full hex nodeid?
1340 # a full hex nodeid?
1340 node = bin(id)
1341 node = bin(id)
1341 self.rev(node)
1342 self.rev(node)
1342 return node
1343 return node
1343 except (TypeError, error.LookupError):
1344 except (TypeError, error.LookupError):
1344 pass
1345 pass
1345
1346
1346 def _partialmatch(self, id):
1347 def _partialmatch(self, id):
1347 # we don't care wdirfilenodeids as they should be always full hash
1348 # we don't care wdirfilenodeids as they should be always full hash
1348 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1349 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1349 try:
1350 try:
1350 partial = self.index.partialmatch(id)
1351 partial = self.index.partialmatch(id)
1351 if partial and self.hasnode(partial):
1352 if partial and self.hasnode(partial):
1352 if maybewdir:
1353 if maybewdir:
1353 # single 'ff...' match in radix tree, ambiguous with wdir
1354 # single 'ff...' match in radix tree, ambiguous with wdir
1354 raise error.RevlogError
1355 raise error.RevlogError
1355 return partial
1356 return partial
1356 if maybewdir:
1357 if maybewdir:
1357 # no 'ff...' match in radix tree, wdir identified
1358 # no 'ff...' match in radix tree, wdir identified
1358 raise error.WdirUnsupported
1359 raise error.WdirUnsupported
1359 return None
1360 return None
1360 except error.RevlogError:
1361 except error.RevlogError:
1361 # parsers.c radix tree lookup gave multiple matches
1362 # parsers.c radix tree lookup gave multiple matches
1362 # fast path: for unfiltered changelog, radix tree is accurate
1363 # fast path: for unfiltered changelog, radix tree is accurate
1363 if not getattr(self, 'filteredrevs', None):
1364 if not getattr(self, 'filteredrevs', None):
1364 raise error.AmbiguousPrefixLookupError(
1365 raise error.AmbiguousPrefixLookupError(
1365 id, self.indexfile, _(b'ambiguous identifier')
1366 id, self.indexfile, _(b'ambiguous identifier')
1366 )
1367 )
1367 # fall through to slow path that filters hidden revisions
1368 # fall through to slow path that filters hidden revisions
1368 except (AttributeError, ValueError):
1369 except (AttributeError, ValueError):
1369 # we are pure python, or key was too short to search radix tree
1370 # we are pure python, or key was too short to search radix tree
1370 pass
1371 pass
1371
1372
1372 if id in self._pcache:
1373 if id in self._pcache:
1373 return self._pcache[id]
1374 return self._pcache[id]
1374
1375
1375 if len(id) <= 40:
1376 if len(id) <= 40:
1376 try:
1377 try:
1377 # hex(node)[:...]
1378 # hex(node)[:...]
1378 l = len(id) // 2 # grab an even number of digits
1379 l = len(id) // 2 # grab an even number of digits
1379 prefix = bin(id[: l * 2])
1380 prefix = bin(id[: l * 2])
1380 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1381 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1381 nl = [
1382 nl = [
1382 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1383 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1383 ]
1384 ]
1384 if self.nodeconstants.nullhex.startswith(id):
1385 if self.nodeconstants.nullhex.startswith(id):
1385 nl.append(self.nullid)
1386 nl.append(self.nullid)
1386 if len(nl) > 0:
1387 if len(nl) > 0:
1387 if len(nl) == 1 and not maybewdir:
1388 if len(nl) == 1 and not maybewdir:
1388 self._pcache[id] = nl[0]
1389 self._pcache[id] = nl[0]
1389 return nl[0]
1390 return nl[0]
1390 raise error.AmbiguousPrefixLookupError(
1391 raise error.AmbiguousPrefixLookupError(
1391 id, self.indexfile, _(b'ambiguous identifier')
1392 id, self.indexfile, _(b'ambiguous identifier')
1392 )
1393 )
1393 if maybewdir:
1394 if maybewdir:
1394 raise error.WdirUnsupported
1395 raise error.WdirUnsupported
1395 return None
1396 return None
1396 except TypeError:
1397 except TypeError:
1397 pass
1398 pass
1398
1399
1399 def lookup(self, id):
1400 def lookup(self, id):
1400 """locate a node based on:
1401 """locate a node based on:
1401 - revision number or str(revision number)
1402 - revision number or str(revision number)
1402 - nodeid or subset of hex nodeid
1403 - nodeid or subset of hex nodeid
1403 """
1404 """
1404 n = self._match(id)
1405 n = self._match(id)
1405 if n is not None:
1406 if n is not None:
1406 return n
1407 return n
1407 n = self._partialmatch(id)
1408 n = self._partialmatch(id)
1408 if n:
1409 if n:
1409 return n
1410 return n
1410
1411
1411 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1412 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1412
1413
1413 def shortest(self, node, minlength=1):
1414 def shortest(self, node, minlength=1):
1414 """Find the shortest unambiguous prefix that matches node."""
1415 """Find the shortest unambiguous prefix that matches node."""
1415
1416
1416 def isvalid(prefix):
1417 def isvalid(prefix):
1417 try:
1418 try:
1418 matchednode = self._partialmatch(prefix)
1419 matchednode = self._partialmatch(prefix)
1419 except error.AmbiguousPrefixLookupError:
1420 except error.AmbiguousPrefixLookupError:
1420 return False
1421 return False
1421 except error.WdirUnsupported:
1422 except error.WdirUnsupported:
1422 # single 'ff...' match
1423 # single 'ff...' match
1423 return True
1424 return True
1424 if matchednode is None:
1425 if matchednode is None:
1425 raise error.LookupError(node, self.indexfile, _(b'no node'))
1426 raise error.LookupError(node, self.indexfile, _(b'no node'))
1426 return True
1427 return True
1427
1428
1428 def maybewdir(prefix):
1429 def maybewdir(prefix):
1429 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1430 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1430
1431
1431 hexnode = hex(node)
1432 hexnode = hex(node)
1432
1433
1433 def disambiguate(hexnode, minlength):
1434 def disambiguate(hexnode, minlength):
1434 """Disambiguate against wdirid."""
1435 """Disambiguate against wdirid."""
1435 for length in range(minlength, len(hexnode) + 1):
1436 for length in range(minlength, len(hexnode) + 1):
1436 prefix = hexnode[:length]
1437 prefix = hexnode[:length]
1437 if not maybewdir(prefix):
1438 if not maybewdir(prefix):
1438 return prefix
1439 return prefix
1439
1440
1440 if not getattr(self, 'filteredrevs', None):
1441 if not getattr(self, 'filteredrevs', None):
1441 try:
1442 try:
1442 length = max(self.index.shortest(node), minlength)
1443 length = max(self.index.shortest(node), minlength)
1443 return disambiguate(hexnode, length)
1444 return disambiguate(hexnode, length)
1444 except error.RevlogError:
1445 except error.RevlogError:
1445 if node != self.nodeconstants.wdirid:
1446 if node != self.nodeconstants.wdirid:
1446 raise error.LookupError(node, self.indexfile, _(b'no node'))
1447 raise error.LookupError(node, self.indexfile, _(b'no node'))
1447 except AttributeError:
1448 except AttributeError:
1448 # Fall through to pure code
1449 # Fall through to pure code
1449 pass
1450 pass
1450
1451
1451 if node == self.nodeconstants.wdirid:
1452 if node == self.nodeconstants.wdirid:
1452 for length in range(minlength, len(hexnode) + 1):
1453 for length in range(minlength, len(hexnode) + 1):
1453 prefix = hexnode[:length]
1454 prefix = hexnode[:length]
1454 if isvalid(prefix):
1455 if isvalid(prefix):
1455 return prefix
1456 return prefix
1456
1457
1457 for length in range(minlength, len(hexnode) + 1):
1458 for length in range(minlength, len(hexnode) + 1):
1458 prefix = hexnode[:length]
1459 prefix = hexnode[:length]
1459 if isvalid(prefix):
1460 if isvalid(prefix):
1460 return disambiguate(hexnode, length)
1461 return disambiguate(hexnode, length)
1461
1462
1462 def cmp(self, node, text):
1463 def cmp(self, node, text):
1463 """compare text with a given file revision
1464 """compare text with a given file revision
1464
1465
1465 returns True if text is different than what is stored.
1466 returns True if text is different than what is stored.
1466 """
1467 """
1467 p1, p2 = self.parents(node)
1468 p1, p2 = self.parents(node)
1468 return storageutil.hashrevisionsha1(text, p1, p2) != node
1469 return storageutil.hashrevisionsha1(text, p1, p2) != node
1469
1470
1470 def _cachesegment(self, offset, data):
1471 def _cachesegment(self, offset, data):
1471 """Add a segment to the revlog cache.
1472 """Add a segment to the revlog cache.
1472
1473
1473 Accepts an absolute offset and the data that is at that location.
1474 Accepts an absolute offset and the data that is at that location.
1474 """
1475 """
1475 o, d = self._chunkcache
1476 o, d = self._chunkcache
1476 # try to add to existing cache
1477 # try to add to existing cache
1477 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1478 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1478 self._chunkcache = o, d + data
1479 self._chunkcache = o, d + data
1479 else:
1480 else:
1480 self._chunkcache = offset, data
1481 self._chunkcache = offset, data
1481
1482
1482 def _readsegment(self, offset, length, df=None):
1483 def _readsegment(self, offset, length, df=None):
1483 """Load a segment of raw data from the revlog.
1484 """Load a segment of raw data from the revlog.
1484
1485
1485 Accepts an absolute offset, length to read, and an optional existing
1486 Accepts an absolute offset, length to read, and an optional existing
1486 file handle to read from.
1487 file handle to read from.
1487
1488
1488 If an existing file handle is passed, it will be seeked and the
1489 If an existing file handle is passed, it will be seeked and the
1489 original seek position will NOT be restored.
1490 original seek position will NOT be restored.
1490
1491
1491 Returns a str or buffer of raw byte data.
1492 Returns a str or buffer of raw byte data.
1492
1493
1493 Raises if the requested number of bytes could not be read.
1494 Raises if the requested number of bytes could not be read.
1494 """
1495 """
1495 # Cache data both forward and backward around the requested
1496 # Cache data both forward and backward around the requested
1496 # data, in a fixed size window. This helps speed up operations
1497 # data, in a fixed size window. This helps speed up operations
1497 # involving reading the revlog backwards.
1498 # involving reading the revlog backwards.
1498 cachesize = self._chunkcachesize
1499 cachesize = self._chunkcachesize
1499 realoffset = offset & ~(cachesize - 1)
1500 realoffset = offset & ~(cachesize - 1)
1500 reallength = (
1501 reallength = (
1501 (offset + length + cachesize) & ~(cachesize - 1)
1502 (offset + length + cachesize) & ~(cachesize - 1)
1502 ) - realoffset
1503 ) - realoffset
1503 with self._datareadfp(df) as df:
1504 with self._datareadfp(df) as df:
1504 df.seek(realoffset)
1505 df.seek(realoffset)
1505 d = df.read(reallength)
1506 d = df.read(reallength)
1506
1507
1507 self._cachesegment(realoffset, d)
1508 self._cachesegment(realoffset, d)
1508 if offset != realoffset or reallength != length:
1509 if offset != realoffset or reallength != length:
1509 startoffset = offset - realoffset
1510 startoffset = offset - realoffset
1510 if len(d) - startoffset < length:
1511 if len(d) - startoffset < length:
1511 raise error.RevlogError(
1512 raise error.RevlogError(
1512 _(
1513 _(
1513 b'partial read of revlog %s; expected %d bytes from '
1514 b'partial read of revlog %s; expected %d bytes from '
1514 b'offset %d, got %d'
1515 b'offset %d, got %d'
1515 )
1516 )
1516 % (
1517 % (
1517 self.indexfile if self._inline else self.datafile,
1518 self.indexfile if self._inline else self.datafile,
1518 length,
1519 length,
1519 realoffset,
1520 realoffset,
1520 len(d) - startoffset,
1521 len(d) - startoffset,
1521 )
1522 )
1522 )
1523 )
1523
1524
1524 return util.buffer(d, startoffset, length)
1525 return util.buffer(d, startoffset, length)
1525
1526
1526 if len(d) < length:
1527 if len(d) < length:
1527 raise error.RevlogError(
1528 raise error.RevlogError(
1528 _(
1529 _(
1529 b'partial read of revlog %s; expected %d bytes from offset '
1530 b'partial read of revlog %s; expected %d bytes from offset '
1530 b'%d, got %d'
1531 b'%d, got %d'
1531 )
1532 )
1532 % (
1533 % (
1533 self.indexfile if self._inline else self.datafile,
1534 self.indexfile if self._inline else self.datafile,
1534 length,
1535 length,
1535 offset,
1536 offset,
1536 len(d),
1537 len(d),
1537 )
1538 )
1538 )
1539 )
1539
1540
1540 return d
1541 return d
1541
1542
1542 def _getsegment(self, offset, length, df=None):
1543 def _getsegment(self, offset, length, df=None):
1543 """Obtain a segment of raw data from the revlog.
1544 """Obtain a segment of raw data from the revlog.
1544
1545
1545 Accepts an absolute offset, length of bytes to obtain, and an
1546 Accepts an absolute offset, length of bytes to obtain, and an
1546 optional file handle to the already-opened revlog. If the file
1547 optional file handle to the already-opened revlog. If the file
1547 handle is used, it's original seek position will not be preserved.
1548 handle is used, it's original seek position will not be preserved.
1548
1549
1549 Requests for data may be returned from a cache.
1550 Requests for data may be returned from a cache.
1550
1551
1551 Returns a str or a buffer instance of raw byte data.
1552 Returns a str or a buffer instance of raw byte data.
1552 """
1553 """
1553 o, d = self._chunkcache
1554 o, d = self._chunkcache
1554 l = len(d)
1555 l = len(d)
1555
1556
1556 # is it in the cache?
1557 # is it in the cache?
1557 cachestart = offset - o
1558 cachestart = offset - o
1558 cacheend = cachestart + length
1559 cacheend = cachestart + length
1559 if cachestart >= 0 and cacheend <= l:
1560 if cachestart >= 0 and cacheend <= l:
1560 if cachestart == 0 and cacheend == l:
1561 if cachestart == 0 and cacheend == l:
1561 return d # avoid a copy
1562 return d # avoid a copy
1562 return util.buffer(d, cachestart, cacheend - cachestart)
1563 return util.buffer(d, cachestart, cacheend - cachestart)
1563
1564
1564 return self._readsegment(offset, length, df=df)
1565 return self._readsegment(offset, length, df=df)
1565
1566
1566 def _getsegmentforrevs(self, startrev, endrev, df=None):
1567 def _getsegmentforrevs(self, startrev, endrev, df=None):
1567 """Obtain a segment of raw data corresponding to a range of revisions.
1568 """Obtain a segment of raw data corresponding to a range of revisions.
1568
1569
1569 Accepts the start and end revisions and an optional already-open
1570 Accepts the start and end revisions and an optional already-open
1570 file handle to be used for reading. If the file handle is read, its
1571 file handle to be used for reading. If the file handle is read, its
1571 seek position will not be preserved.
1572 seek position will not be preserved.
1572
1573
1573 Requests for data may be satisfied by a cache.
1574 Requests for data may be satisfied by a cache.
1574
1575
1575 Returns a 2-tuple of (offset, data) for the requested range of
1576 Returns a 2-tuple of (offset, data) for the requested range of
1576 revisions. Offset is the integer offset from the beginning of the
1577 revisions. Offset is the integer offset from the beginning of the
1577 revlog and data is a str or buffer of the raw byte data.
1578 revlog and data is a str or buffer of the raw byte data.
1578
1579
1579 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1580 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1580 to determine where each revision's data begins and ends.
1581 to determine where each revision's data begins and ends.
1581 """
1582 """
1582 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1583 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1583 # (functions are expensive).
1584 # (functions are expensive).
1584 index = self.index
1585 index = self.index
1585 istart = index[startrev]
1586 istart = index[startrev]
1586 start = int(istart[0] >> 16)
1587 start = int(istart[0] >> 16)
1587 if startrev == endrev:
1588 if startrev == endrev:
1588 end = start + istart[1]
1589 end = start + istart[1]
1589 else:
1590 else:
1590 iend = index[endrev]
1591 iend = index[endrev]
1591 end = int(iend[0] >> 16) + iend[1]
1592 end = int(iend[0] >> 16) + iend[1]
1592
1593
1593 if self._inline:
1594 if self._inline:
1594 start += (startrev + 1) * self.index.entry_size
1595 start += (startrev + 1) * self.index.entry_size
1595 end += (endrev + 1) * self.index.entry_size
1596 end += (endrev + 1) * self.index.entry_size
1596 length = end - start
1597 length = end - start
1597
1598
1598 return start, self._getsegment(start, length, df=df)
1599 return start, self._getsegment(start, length, df=df)
1599
1600
1600 def _chunk(self, rev, df=None):
1601 def _chunk(self, rev, df=None):
1601 """Obtain a single decompressed chunk for a revision.
1602 """Obtain a single decompressed chunk for a revision.
1602
1603
1603 Accepts an integer revision and an optional already-open file handle
1604 Accepts an integer revision and an optional already-open file handle
1604 to be used for reading. If used, the seek position of the file will not
1605 to be used for reading. If used, the seek position of the file will not
1605 be preserved.
1606 be preserved.
1606
1607
1607 Returns a str holding uncompressed data for the requested revision.
1608 Returns a str holding uncompressed data for the requested revision.
1608 """
1609 """
1609 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1610 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1610
1611
1611 def _chunks(self, revs, df=None, targetsize=None):
1612 def _chunks(self, revs, df=None, targetsize=None):
1612 """Obtain decompressed chunks for the specified revisions.
1613 """Obtain decompressed chunks for the specified revisions.
1613
1614
1614 Accepts an iterable of numeric revisions that are assumed to be in
1615 Accepts an iterable of numeric revisions that are assumed to be in
1615 ascending order. Also accepts an optional already-open file handle
1616 ascending order. Also accepts an optional already-open file handle
1616 to be used for reading. If used, the seek position of the file will
1617 to be used for reading. If used, the seek position of the file will
1617 not be preserved.
1618 not be preserved.
1618
1619
1619 This function is similar to calling ``self._chunk()`` multiple times,
1620 This function is similar to calling ``self._chunk()`` multiple times,
1620 but is faster.
1621 but is faster.
1621
1622
1622 Returns a list with decompressed data for each requested revision.
1623 Returns a list with decompressed data for each requested revision.
1623 """
1624 """
1624 if not revs:
1625 if not revs:
1625 return []
1626 return []
1626 start = self.start
1627 start = self.start
1627 length = self.length
1628 length = self.length
1628 inline = self._inline
1629 inline = self._inline
1629 iosize = self.index.entry_size
1630 iosize = self.index.entry_size
1630 buffer = util.buffer
1631 buffer = util.buffer
1631
1632
1632 l = []
1633 l = []
1633 ladd = l.append
1634 ladd = l.append
1634
1635
1635 if not self._withsparseread:
1636 if not self._withsparseread:
1636 slicedchunks = (revs,)
1637 slicedchunks = (revs,)
1637 else:
1638 else:
1638 slicedchunks = deltautil.slicechunk(
1639 slicedchunks = deltautil.slicechunk(
1639 self, revs, targetsize=targetsize
1640 self, revs, targetsize=targetsize
1640 )
1641 )
1641
1642
1642 for revschunk in slicedchunks:
1643 for revschunk in slicedchunks:
1643 firstrev = revschunk[0]
1644 firstrev = revschunk[0]
1644 # Skip trailing revisions with empty diff
1645 # Skip trailing revisions with empty diff
1645 for lastrev in revschunk[::-1]:
1646 for lastrev in revschunk[::-1]:
1646 if length(lastrev) != 0:
1647 if length(lastrev) != 0:
1647 break
1648 break
1648
1649
1649 try:
1650 try:
1650 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1651 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1651 except OverflowError:
1652 except OverflowError:
1652 # issue4215 - we can't cache a run of chunks greater than
1653 # issue4215 - we can't cache a run of chunks greater than
1653 # 2G on Windows
1654 # 2G on Windows
1654 return [self._chunk(rev, df=df) for rev in revschunk]
1655 return [self._chunk(rev, df=df) for rev in revschunk]
1655
1656
1656 decomp = self.decompress
1657 decomp = self.decompress
1657 for rev in revschunk:
1658 for rev in revschunk:
1658 chunkstart = start(rev)
1659 chunkstart = start(rev)
1659 if inline:
1660 if inline:
1660 chunkstart += (rev + 1) * iosize
1661 chunkstart += (rev + 1) * iosize
1661 chunklength = length(rev)
1662 chunklength = length(rev)
1662 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1663 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1663
1664
1664 return l
1665 return l
1665
1666
1666 def _chunkclear(self):
1667 def _chunkclear(self):
1667 """Clear the raw chunk cache."""
1668 """Clear the raw chunk cache."""
1668 self._chunkcache = (0, b'')
1669 self._chunkcache = (0, b'')
1669
1670
1670 def deltaparent(self, rev):
1671 def deltaparent(self, rev):
1671 """return deltaparent of the given revision"""
1672 """return deltaparent of the given revision"""
1672 base = self.index[rev][3]
1673 base = self.index[rev][3]
1673 if base == rev:
1674 if base == rev:
1674 return nullrev
1675 return nullrev
1675 elif self._generaldelta:
1676 elif self._generaldelta:
1676 return base
1677 return base
1677 else:
1678 else:
1678 return rev - 1
1679 return rev - 1
1679
1680
1680 def issnapshot(self, rev):
1681 def issnapshot(self, rev):
1681 """tells whether rev is a snapshot"""
1682 """tells whether rev is a snapshot"""
1682 if not self._sparserevlog:
1683 if not self._sparserevlog:
1683 return self.deltaparent(rev) == nullrev
1684 return self.deltaparent(rev) == nullrev
1684 elif util.safehasattr(self.index, b'issnapshot'):
1685 elif util.safehasattr(self.index, b'issnapshot'):
1685 # directly assign the method to cache the testing and access
1686 # directly assign the method to cache the testing and access
1686 self.issnapshot = self.index.issnapshot
1687 self.issnapshot = self.index.issnapshot
1687 return self.issnapshot(rev)
1688 return self.issnapshot(rev)
1688 if rev == nullrev:
1689 if rev == nullrev:
1689 return True
1690 return True
1690 entry = self.index[rev]
1691 entry = self.index[rev]
1691 base = entry[3]
1692 base = entry[3]
1692 if base == rev:
1693 if base == rev:
1693 return True
1694 return True
1694 if base == nullrev:
1695 if base == nullrev:
1695 return True
1696 return True
1696 p1 = entry[5]
1697 p1 = entry[5]
1697 p2 = entry[6]
1698 p2 = entry[6]
1698 if base == p1 or base == p2:
1699 if base == p1 or base == p2:
1699 return False
1700 return False
1700 return self.issnapshot(base)
1701 return self.issnapshot(base)
1701
1702
1702 def snapshotdepth(self, rev):
1703 def snapshotdepth(self, rev):
1703 """number of snapshot in the chain before this one"""
1704 """number of snapshot in the chain before this one"""
1704 if not self.issnapshot(rev):
1705 if not self.issnapshot(rev):
1705 raise error.ProgrammingError(b'revision %d not a snapshot')
1706 raise error.ProgrammingError(b'revision %d not a snapshot')
1706 return len(self._deltachain(rev)[0]) - 1
1707 return len(self._deltachain(rev)[0]) - 1
1707
1708
1708 def revdiff(self, rev1, rev2):
1709 def revdiff(self, rev1, rev2):
1709 """return or calculate a delta between two revisions
1710 """return or calculate a delta between two revisions
1710
1711
1711 The delta calculated is in binary form and is intended to be written to
1712 The delta calculated is in binary form and is intended to be written to
1712 revlog data directly. So this function needs raw revision data.
1713 revlog data directly. So this function needs raw revision data.
1713 """
1714 """
1714 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1715 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1715 return bytes(self._chunk(rev2))
1716 return bytes(self._chunk(rev2))
1716
1717
1717 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1718 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1718
1719
1719 def _processflags(self, text, flags, operation, raw=False):
1720 def _processflags(self, text, flags, operation, raw=False):
1720 """deprecated entry point to access flag processors"""
1721 """deprecated entry point to access flag processors"""
1721 msg = b'_processflag(...) use the specialized variant'
1722 msg = b'_processflag(...) use the specialized variant'
1722 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1723 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1723 if raw:
1724 if raw:
1724 return text, flagutil.processflagsraw(self, text, flags)
1725 return text, flagutil.processflagsraw(self, text, flags)
1725 elif operation == b'read':
1726 elif operation == b'read':
1726 return flagutil.processflagsread(self, text, flags)
1727 return flagutil.processflagsread(self, text, flags)
1727 else: # write operation
1728 else: # write operation
1728 return flagutil.processflagswrite(self, text, flags)
1729 return flagutil.processflagswrite(self, text, flags)
1729
1730
1730 def revision(self, nodeorrev, _df=None, raw=False):
1731 def revision(self, nodeorrev, _df=None, raw=False):
1731 """return an uncompressed revision of a given node or revision
1732 """return an uncompressed revision of a given node or revision
1732 number.
1733 number.
1733
1734
1734 _df - an existing file handle to read from. (internal-only)
1735 _df - an existing file handle to read from. (internal-only)
1735 raw - an optional argument specifying if the revision data is to be
1736 raw - an optional argument specifying if the revision data is to be
1736 treated as raw data when applying flag transforms. 'raw' should be set
1737 treated as raw data when applying flag transforms. 'raw' should be set
1737 to True when generating changegroups or in debug commands.
1738 to True when generating changegroups or in debug commands.
1738 """
1739 """
1739 if raw:
1740 if raw:
1740 msg = (
1741 msg = (
1741 b'revlog.revision(..., raw=True) is deprecated, '
1742 b'revlog.revision(..., raw=True) is deprecated, '
1742 b'use revlog.rawdata(...)'
1743 b'use revlog.rawdata(...)'
1743 )
1744 )
1744 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1745 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1745 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1746 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1746
1747
1747 def sidedata(self, nodeorrev, _df=None):
1748 def sidedata(self, nodeorrev, _df=None):
1748 """a map of extra data related to the changeset but not part of the hash
1749 """a map of extra data related to the changeset but not part of the hash
1749
1750
1750 This function currently return a dictionary. However, more advanced
1751 This function currently return a dictionary. However, more advanced
1751 mapping object will likely be used in the future for a more
1752 mapping object will likely be used in the future for a more
1752 efficient/lazy code.
1753 efficient/lazy code.
1753 """
1754 """
1754 return self._revisiondata(nodeorrev, _df)[1]
1755 return self._revisiondata(nodeorrev, _df)[1]
1755
1756
1756 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1757 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1757 # deal with <nodeorrev> argument type
1758 # deal with <nodeorrev> argument type
1758 if isinstance(nodeorrev, int):
1759 if isinstance(nodeorrev, int):
1759 rev = nodeorrev
1760 rev = nodeorrev
1760 node = self.node(rev)
1761 node = self.node(rev)
1761 else:
1762 else:
1762 node = nodeorrev
1763 node = nodeorrev
1763 rev = None
1764 rev = None
1764
1765
1765 # fast path the special `nullid` rev
1766 # fast path the special `nullid` rev
1766 if node == self.nullid:
1767 if node == self.nullid:
1767 return b"", {}
1768 return b"", {}
1768
1769
1769 # ``rawtext`` is the text as stored inside the revlog. Might be the
1770 # ``rawtext`` is the text as stored inside the revlog. Might be the
1770 # revision or might need to be processed to retrieve the revision.
1771 # revision or might need to be processed to retrieve the revision.
1771 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1772 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1772
1773
1773 if self.version & 0xFFFF == REVLOGV2:
1774 if self.version & 0xFFFF == REVLOGV2:
1774 if rev is None:
1775 if rev is None:
1775 rev = self.rev(node)
1776 rev = self.rev(node)
1776 sidedata = self._sidedata(rev)
1777 sidedata = self._sidedata(rev)
1777 else:
1778 else:
1778 sidedata = {}
1779 sidedata = {}
1779
1780
1780 if raw and validated:
1781 if raw and validated:
1781 # if we don't want to process the raw text and that raw
1782 # if we don't want to process the raw text and that raw
1782 # text is cached, we can exit early.
1783 # text is cached, we can exit early.
1783 return rawtext, sidedata
1784 return rawtext, sidedata
1784 if rev is None:
1785 if rev is None:
1785 rev = self.rev(node)
1786 rev = self.rev(node)
1786 # the revlog's flag for this revision
1787 # the revlog's flag for this revision
1787 # (usually alter its state or content)
1788 # (usually alter its state or content)
1788 flags = self.flags(rev)
1789 flags = self.flags(rev)
1789
1790
1790 if validated and flags == REVIDX_DEFAULT_FLAGS:
1791 if validated and flags == REVIDX_DEFAULT_FLAGS:
1791 # no extra flags set, no flag processor runs, text = rawtext
1792 # no extra flags set, no flag processor runs, text = rawtext
1792 return rawtext, sidedata
1793 return rawtext, sidedata
1793
1794
1794 if raw:
1795 if raw:
1795 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1796 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1796 text = rawtext
1797 text = rawtext
1797 else:
1798 else:
1798 r = flagutil.processflagsread(self, rawtext, flags)
1799 r = flagutil.processflagsread(self, rawtext, flags)
1799 text, validatehash = r
1800 text, validatehash = r
1800 if validatehash:
1801 if validatehash:
1801 self.checkhash(text, node, rev=rev)
1802 self.checkhash(text, node, rev=rev)
1802 if not validated:
1803 if not validated:
1803 self._revisioncache = (node, rev, rawtext)
1804 self._revisioncache = (node, rev, rawtext)
1804
1805
1805 return text, sidedata
1806 return text, sidedata
1806
1807
1807 def _rawtext(self, node, rev, _df=None):
1808 def _rawtext(self, node, rev, _df=None):
1808 """return the possibly unvalidated rawtext for a revision
1809 """return the possibly unvalidated rawtext for a revision
1809
1810
1810 returns (rev, rawtext, validated)
1811 returns (rev, rawtext, validated)
1811 """
1812 """
1812
1813
1813 # revision in the cache (could be useful to apply delta)
1814 # revision in the cache (could be useful to apply delta)
1814 cachedrev = None
1815 cachedrev = None
1815 # An intermediate text to apply deltas to
1816 # An intermediate text to apply deltas to
1816 basetext = None
1817 basetext = None
1817
1818
1818 # Check if we have the entry in cache
1819 # Check if we have the entry in cache
1819 # The cache entry looks like (node, rev, rawtext)
1820 # The cache entry looks like (node, rev, rawtext)
1820 if self._revisioncache:
1821 if self._revisioncache:
1821 if self._revisioncache[0] == node:
1822 if self._revisioncache[0] == node:
1822 return (rev, self._revisioncache[2], True)
1823 return (rev, self._revisioncache[2], True)
1823 cachedrev = self._revisioncache[1]
1824 cachedrev = self._revisioncache[1]
1824
1825
1825 if rev is None:
1826 if rev is None:
1826 rev = self.rev(node)
1827 rev = self.rev(node)
1827
1828
1828 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1829 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1829 if stopped:
1830 if stopped:
1830 basetext = self._revisioncache[2]
1831 basetext = self._revisioncache[2]
1831
1832
1832 # drop cache to save memory, the caller is expected to
1833 # drop cache to save memory, the caller is expected to
1833 # update self._revisioncache after validating the text
1834 # update self._revisioncache after validating the text
1834 self._revisioncache = None
1835 self._revisioncache = None
1835
1836
1836 targetsize = None
1837 targetsize = None
1837 rawsize = self.index[rev][2]
1838 rawsize = self.index[rev][2]
1838 if 0 <= rawsize:
1839 if 0 <= rawsize:
1839 targetsize = 4 * rawsize
1840 targetsize = 4 * rawsize
1840
1841
1841 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1842 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1842 if basetext is None:
1843 if basetext is None:
1843 basetext = bytes(bins[0])
1844 basetext = bytes(bins[0])
1844 bins = bins[1:]
1845 bins = bins[1:]
1845
1846
1846 rawtext = mdiff.patches(basetext, bins)
1847 rawtext = mdiff.patches(basetext, bins)
1847 del basetext # let us have a chance to free memory early
1848 del basetext # let us have a chance to free memory early
1848 return (rev, rawtext, False)
1849 return (rev, rawtext, False)
1849
1850
1850 def _sidedata(self, rev):
1851 def _sidedata(self, rev):
1851 """Return the sidedata for a given revision number."""
1852 """Return the sidedata for a given revision number."""
1852 index_entry = self.index[rev]
1853 index_entry = self.index[rev]
1853 sidedata_offset = index_entry[8]
1854 sidedata_offset = index_entry[8]
1854 sidedata_size = index_entry[9]
1855 sidedata_size = index_entry[9]
1855
1856
1856 if self._inline:
1857 if self._inline:
1857 sidedata_offset += self.index.entry_size * (1 + rev)
1858 sidedata_offset += self.index.entry_size * (1 + rev)
1858 if sidedata_size == 0:
1859 if sidedata_size == 0:
1859 return {}
1860 return {}
1860
1861
1861 segment = self._getsegment(sidedata_offset, sidedata_size)
1862 segment = self._getsegment(sidedata_offset, sidedata_size)
1862 sidedata = sidedatautil.deserialize_sidedata(segment)
1863 sidedata = sidedatautil.deserialize_sidedata(segment)
1863 return sidedata
1864 return sidedata
1864
1865
1865 def rawdata(self, nodeorrev, _df=None):
1866 def rawdata(self, nodeorrev, _df=None):
1866 """return an uncompressed raw data of a given node or revision number.
1867 """return an uncompressed raw data of a given node or revision number.
1867
1868
1868 _df - an existing file handle to read from. (internal-only)
1869 _df - an existing file handle to read from. (internal-only)
1869 """
1870 """
1870 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1871 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1871
1872
1872 def hash(self, text, p1, p2):
1873 def hash(self, text, p1, p2):
1873 """Compute a node hash.
1874 """Compute a node hash.
1874
1875
1875 Available as a function so that subclasses can replace the hash
1876 Available as a function so that subclasses can replace the hash
1876 as needed.
1877 as needed.
1877 """
1878 """
1878 return storageutil.hashrevisionsha1(text, p1, p2)
1879 return storageutil.hashrevisionsha1(text, p1, p2)
1879
1880
1880 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1881 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1881 """Check node hash integrity.
1882 """Check node hash integrity.
1882
1883
1883 Available as a function so that subclasses can extend hash mismatch
1884 Available as a function so that subclasses can extend hash mismatch
1884 behaviors as needed.
1885 behaviors as needed.
1885 """
1886 """
1886 try:
1887 try:
1887 if p1 is None and p2 is None:
1888 if p1 is None and p2 is None:
1888 p1, p2 = self.parents(node)
1889 p1, p2 = self.parents(node)
1889 if node != self.hash(text, p1, p2):
1890 if node != self.hash(text, p1, p2):
1890 # Clear the revision cache on hash failure. The revision cache
1891 # Clear the revision cache on hash failure. The revision cache
1891 # only stores the raw revision and clearing the cache does have
1892 # only stores the raw revision and clearing the cache does have
1892 # the side-effect that we won't have a cache hit when the raw
1893 # the side-effect that we won't have a cache hit when the raw
1893 # revision data is accessed. But this case should be rare and
1894 # revision data is accessed. But this case should be rare and
1894 # it is extra work to teach the cache about the hash
1895 # it is extra work to teach the cache about the hash
1895 # verification state.
1896 # verification state.
1896 if self._revisioncache and self._revisioncache[0] == node:
1897 if self._revisioncache and self._revisioncache[0] == node:
1897 self._revisioncache = None
1898 self._revisioncache = None
1898
1899
1899 revornode = rev
1900 revornode = rev
1900 if revornode is None:
1901 if revornode is None:
1901 revornode = templatefilters.short(hex(node))
1902 revornode = templatefilters.short(hex(node))
1902 raise error.RevlogError(
1903 raise error.RevlogError(
1903 _(b"integrity check failed on %s:%s")
1904 _(b"integrity check failed on %s:%s")
1904 % (self.indexfile, pycompat.bytestr(revornode))
1905 % (self.indexfile, pycompat.bytestr(revornode))
1905 )
1906 )
1906 except error.RevlogError:
1907 except error.RevlogError:
1907 if self._censorable and storageutil.iscensoredtext(text):
1908 if self._censorable and storageutil.iscensoredtext(text):
1908 raise error.CensoredNodeError(self.indexfile, node, text)
1909 raise error.CensoredNodeError(self.indexfile, node, text)
1909 raise
1910 raise
1910
1911
1911 def _enforceinlinesize(self, tr, fp=None):
1912 def _enforceinlinesize(self, tr, fp=None):
1912 """Check if the revlog is too big for inline and convert if so.
1913 """Check if the revlog is too big for inline and convert if so.
1913
1914
1914 This should be called after revisions are added to the revlog. If the
1915 This should be called after revisions are added to the revlog. If the
1915 revlog has grown too large to be an inline revlog, it will convert it
1916 revlog has grown too large to be an inline revlog, it will convert it
1916 to use multiple index and data files.
1917 to use multiple index and data files.
1917 """
1918 """
1918 tiprev = len(self) - 1
1919 tiprev = len(self) - 1
1919 if (
1920 if (
1920 not self._inline
1921 not self._inline
1921 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1922 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1922 ):
1923 ):
1923 return
1924 return
1924
1925
1925 troffset = tr.findoffset(self.indexfile)
1926 troffset = tr.findoffset(self.indexfile)
1926 if troffset is None:
1927 if troffset is None:
1927 raise error.RevlogError(
1928 raise error.RevlogError(
1928 _(b"%s not found in the transaction") % self.indexfile
1929 _(b"%s not found in the transaction") % self.indexfile
1929 )
1930 )
1930 trindex = 0
1931 trindex = 0
1931 tr.add(self.datafile, 0)
1932 tr.add(self.datafile, 0)
1932
1933
1933 if fp:
1934 if fp:
1934 fp.flush()
1935 fp.flush()
1935 fp.close()
1936 fp.close()
1936 # We can't use the cached file handle after close(). So prevent
1937 # We can't use the cached file handle after close(). So prevent
1937 # its usage.
1938 # its usage.
1938 self._writinghandles = None
1939 self._writinghandles = None
1939
1940
1940 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1941 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1941 for r in self:
1942 for r in self:
1942 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1943 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1943 if troffset <= self.start(r):
1944 if troffset <= self.start(r):
1944 trindex = r
1945 trindex = r
1945
1946
1946 with self._indexfp(b'w') as fp:
1947 with self._indexfp(b'w') as fp:
1947 self.version &= ~FLAG_INLINE_DATA
1948 self.version &= ~FLAG_INLINE_DATA
1948 self._inline = False
1949 self._inline = False
1949 for i in self:
1950 for i in self:
1950 e = self.index.entry_binary(i)
1951 e = self.index.entry_binary(i)
1951 if i == 0:
1952 if i == 0:
1952 header = self.index.pack_header(self.version)
1953 header = self.index.pack_header(self.version)
1953 e = header + e
1954 e = header + e
1954 fp.write(e)
1955 fp.write(e)
1955
1956
1956 # the temp file replace the real index when we exit the context
1957 # the temp file replace the real index when we exit the context
1957 # manager
1958 # manager
1958
1959
1959 tr.replace(self.indexfile, trindex * self.index.entry_size)
1960 tr.replace(self.indexfile, trindex * self.index.entry_size)
1960 nodemaputil.setup_persistent_nodemap(tr, self)
1961 nodemaputil.setup_persistent_nodemap(tr, self)
1961 self._chunkclear()
1962 self._chunkclear()
1962
1963
1963 def _nodeduplicatecallback(self, transaction, node):
1964 def _nodeduplicatecallback(self, transaction, node):
1964 """called when trying to add a node already stored."""
1965 """called when trying to add a node already stored."""
1965
1966
1966 def addrevision(
1967 def addrevision(
1967 self,
1968 self,
1968 text,
1969 text,
1969 transaction,
1970 transaction,
1970 link,
1971 link,
1971 p1,
1972 p1,
1972 p2,
1973 p2,
1973 cachedelta=None,
1974 cachedelta=None,
1974 node=None,
1975 node=None,
1975 flags=REVIDX_DEFAULT_FLAGS,
1976 flags=REVIDX_DEFAULT_FLAGS,
1976 deltacomputer=None,
1977 deltacomputer=None,
1977 sidedata=None,
1978 sidedata=None,
1978 ):
1979 ):
1979 """add a revision to the log
1980 """add a revision to the log
1980
1981
1981 text - the revision data to add
1982 text - the revision data to add
1982 transaction - the transaction object used for rollback
1983 transaction - the transaction object used for rollback
1983 link - the linkrev data to add
1984 link - the linkrev data to add
1984 p1, p2 - the parent nodeids of the revision
1985 p1, p2 - the parent nodeids of the revision
1985 cachedelta - an optional precomputed delta
1986 cachedelta - an optional precomputed delta
1986 node - nodeid of revision; typically node is not specified, and it is
1987 node - nodeid of revision; typically node is not specified, and it is
1987 computed by default as hash(text, p1, p2), however subclasses might
1988 computed by default as hash(text, p1, p2), however subclasses might
1988 use different hashing method (and override checkhash() in such case)
1989 use different hashing method (and override checkhash() in such case)
1989 flags - the known flags to set on the revision
1990 flags - the known flags to set on the revision
1990 deltacomputer - an optional deltacomputer instance shared between
1991 deltacomputer - an optional deltacomputer instance shared between
1991 multiple calls
1992 multiple calls
1992 """
1993 """
1993 if link == nullrev:
1994 if link == nullrev:
1994 raise error.RevlogError(
1995 raise error.RevlogError(
1995 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1996 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1996 )
1997 )
1997
1998
1998 if sidedata is None:
1999 if sidedata is None:
1999 sidedata = {}
2000 sidedata = {}
2000 elif not self.hassidedata:
2001 elif sidedata and not self.hassidedata:
2001 raise error.ProgrammingError(
2002 raise error.ProgrammingError(
2002 _(b"trying to add sidedata to a revlog who don't support them")
2003 _(b"trying to add sidedata to a revlog who don't support them")
2003 )
2004 )
2004
2005
2005 if flags:
2006 if flags:
2006 node = node or self.hash(text, p1, p2)
2007 node = node or self.hash(text, p1, p2)
2007
2008
2008 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2009 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2009
2010
2010 # If the flag processor modifies the revision data, ignore any provided
2011 # If the flag processor modifies the revision data, ignore any provided
2011 # cachedelta.
2012 # cachedelta.
2012 if rawtext != text:
2013 if rawtext != text:
2013 cachedelta = None
2014 cachedelta = None
2014
2015
2015 if len(rawtext) > _maxentrysize:
2016 if len(rawtext) > _maxentrysize:
2016 raise error.RevlogError(
2017 raise error.RevlogError(
2017 _(
2018 _(
2018 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2019 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2019 )
2020 )
2020 % (self.indexfile, len(rawtext))
2021 % (self.indexfile, len(rawtext))
2021 )
2022 )
2022
2023
2023 node = node or self.hash(rawtext, p1, p2)
2024 node = node or self.hash(rawtext, p1, p2)
2024 rev = self.index.get_rev(node)
2025 rev = self.index.get_rev(node)
2025 if rev is not None:
2026 if rev is not None:
2026 return rev
2027 return rev
2027
2028
2028 if validatehash:
2029 if validatehash:
2029 self.checkhash(rawtext, node, p1=p1, p2=p2)
2030 self.checkhash(rawtext, node, p1=p1, p2=p2)
2030
2031
2031 return self.addrawrevision(
2032 return self.addrawrevision(
2032 rawtext,
2033 rawtext,
2033 transaction,
2034 transaction,
2034 link,
2035 link,
2035 p1,
2036 p1,
2036 p2,
2037 p2,
2037 node,
2038 node,
2038 flags,
2039 flags,
2039 cachedelta=cachedelta,
2040 cachedelta=cachedelta,
2040 deltacomputer=deltacomputer,
2041 deltacomputer=deltacomputer,
2041 sidedata=sidedata,
2042 sidedata=sidedata,
2042 )
2043 )
2043
2044
2044 def addrawrevision(
2045 def addrawrevision(
2045 self,
2046 self,
2046 rawtext,
2047 rawtext,
2047 transaction,
2048 transaction,
2048 link,
2049 link,
2049 p1,
2050 p1,
2050 p2,
2051 p2,
2051 node,
2052 node,
2052 flags,
2053 flags,
2053 cachedelta=None,
2054 cachedelta=None,
2054 deltacomputer=None,
2055 deltacomputer=None,
2055 sidedata=None,
2056 sidedata=None,
2056 ):
2057 ):
2057 """add a raw revision with known flags, node and parents
2058 """add a raw revision with known flags, node and parents
2058 useful when reusing a revision not stored in this revlog (ex: received
2059 useful when reusing a revision not stored in this revlog (ex: received
2059 over wire, or read from an external bundle).
2060 over wire, or read from an external bundle).
2060 """
2061 """
2061 dfh = None
2062 dfh = None
2062 if not self._inline:
2063 if not self._inline:
2063 dfh = self._datafp(b"a+")
2064 dfh = self._datafp(b"a+")
2064 ifh = self._indexfp(b"a+")
2065 ifh = self._indexfp(b"a+")
2065 try:
2066 try:
2066 return self._addrevision(
2067 return self._addrevision(
2067 node,
2068 node,
2068 rawtext,
2069 rawtext,
2069 transaction,
2070 transaction,
2070 link,
2071 link,
2071 p1,
2072 p1,
2072 p2,
2073 p2,
2073 flags,
2074 flags,
2074 cachedelta,
2075 cachedelta,
2075 ifh,
2076 ifh,
2076 dfh,
2077 dfh,
2077 deltacomputer=deltacomputer,
2078 deltacomputer=deltacomputer,
2078 sidedata=sidedata,
2079 sidedata=sidedata,
2079 )
2080 )
2080 finally:
2081 finally:
2081 if dfh:
2082 if dfh:
2082 dfh.close()
2083 dfh.close()
2083 ifh.close()
2084 ifh.close()
2084
2085
2085 def compress(self, data):
2086 def compress(self, data):
2086 """Generate a possibly-compressed representation of data."""
2087 """Generate a possibly-compressed representation of data."""
2087 if not data:
2088 if not data:
2088 return b'', data
2089 return b'', data
2089
2090
2090 compressed = self._compressor.compress(data)
2091 compressed = self._compressor.compress(data)
2091
2092
2092 if compressed:
2093 if compressed:
2093 # The revlog compressor added the header in the returned data.
2094 # The revlog compressor added the header in the returned data.
2094 return b'', compressed
2095 return b'', compressed
2095
2096
2096 if data[0:1] == b'\0':
2097 if data[0:1] == b'\0':
2097 return b'', data
2098 return b'', data
2098 return b'u', data
2099 return b'u', data
2099
2100
2100 def decompress(self, data):
2101 def decompress(self, data):
2101 """Decompress a revlog chunk.
2102 """Decompress a revlog chunk.
2102
2103
2103 The chunk is expected to begin with a header identifying the
2104 The chunk is expected to begin with a header identifying the
2104 format type so it can be routed to an appropriate decompressor.
2105 format type so it can be routed to an appropriate decompressor.
2105 """
2106 """
2106 if not data:
2107 if not data:
2107 return data
2108 return data
2108
2109
2109 # Revlogs are read much more frequently than they are written and many
2110 # Revlogs are read much more frequently than they are written and many
2110 # chunks only take microseconds to decompress, so performance is
2111 # chunks only take microseconds to decompress, so performance is
2111 # important here.
2112 # important here.
2112 #
2113 #
2113 # We can make a few assumptions about revlogs:
2114 # We can make a few assumptions about revlogs:
2114 #
2115 #
2115 # 1) the majority of chunks will be compressed (as opposed to inline
2116 # 1) the majority of chunks will be compressed (as opposed to inline
2116 # raw data).
2117 # raw data).
2117 # 2) decompressing *any* data will likely by at least 10x slower than
2118 # 2) decompressing *any* data will likely by at least 10x slower than
2118 # returning raw inline data.
2119 # returning raw inline data.
2119 # 3) we want to prioritize common and officially supported compression
2120 # 3) we want to prioritize common and officially supported compression
2120 # engines
2121 # engines
2121 #
2122 #
2122 # It follows that we want to optimize for "decompress compressed data
2123 # It follows that we want to optimize for "decompress compressed data
2123 # when encoded with common and officially supported compression engines"
2124 # when encoded with common and officially supported compression engines"
2124 # case over "raw data" and "data encoded by less common or non-official
2125 # case over "raw data" and "data encoded by less common or non-official
2125 # compression engines." That is why we have the inline lookup first
2126 # compression engines." That is why we have the inline lookup first
2126 # followed by the compengines lookup.
2127 # followed by the compengines lookup.
2127 #
2128 #
2128 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2129 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2129 # compressed chunks. And this matters for changelog and manifest reads.
2130 # compressed chunks. And this matters for changelog and manifest reads.
2130 t = data[0:1]
2131 t = data[0:1]
2131
2132
2132 if t == b'x':
2133 if t == b'x':
2133 try:
2134 try:
2134 return _zlibdecompress(data)
2135 return _zlibdecompress(data)
2135 except zlib.error as e:
2136 except zlib.error as e:
2136 raise error.RevlogError(
2137 raise error.RevlogError(
2137 _(b'revlog decompress error: %s')
2138 _(b'revlog decompress error: %s')
2138 % stringutil.forcebytestr(e)
2139 % stringutil.forcebytestr(e)
2139 )
2140 )
2140 # '\0' is more common than 'u' so it goes first.
2141 # '\0' is more common than 'u' so it goes first.
2141 elif t == b'\0':
2142 elif t == b'\0':
2142 return data
2143 return data
2143 elif t == b'u':
2144 elif t == b'u':
2144 return util.buffer(data, 1)
2145 return util.buffer(data, 1)
2145
2146
2146 try:
2147 try:
2147 compressor = self._decompressors[t]
2148 compressor = self._decompressors[t]
2148 except KeyError:
2149 except KeyError:
2149 try:
2150 try:
2150 engine = util.compengines.forrevlogheader(t)
2151 engine = util.compengines.forrevlogheader(t)
2151 compressor = engine.revlogcompressor(self._compengineopts)
2152 compressor = engine.revlogcompressor(self._compengineopts)
2152 self._decompressors[t] = compressor
2153 self._decompressors[t] = compressor
2153 except KeyError:
2154 except KeyError:
2154 raise error.RevlogError(
2155 raise error.RevlogError(
2155 _(b'unknown compression type %s') % binascii.hexlify(t)
2156 _(b'unknown compression type %s') % binascii.hexlify(t)
2156 )
2157 )
2157
2158
2158 return compressor.decompress(data)
2159 return compressor.decompress(data)
2159
2160
2160 def _addrevision(
2161 def _addrevision(
2161 self,
2162 self,
2162 node,
2163 node,
2163 rawtext,
2164 rawtext,
2164 transaction,
2165 transaction,
2165 link,
2166 link,
2166 p1,
2167 p1,
2167 p2,
2168 p2,
2168 flags,
2169 flags,
2169 cachedelta,
2170 cachedelta,
2170 ifh,
2171 ifh,
2171 dfh,
2172 dfh,
2172 alwayscache=False,
2173 alwayscache=False,
2173 deltacomputer=None,
2174 deltacomputer=None,
2174 sidedata=None,
2175 sidedata=None,
2175 ):
2176 ):
2176 """internal function to add revisions to the log
2177 """internal function to add revisions to the log
2177
2178
2178 see addrevision for argument descriptions.
2179 see addrevision for argument descriptions.
2179
2180
2180 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2181 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2181
2182
2182 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2183 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2183 be used.
2184 be used.
2184
2185
2185 invariants:
2186 invariants:
2186 - rawtext is optional (can be None); if not set, cachedelta must be set.
2187 - rawtext is optional (can be None); if not set, cachedelta must be set.
2187 if both are set, they must correspond to each other.
2188 if both are set, they must correspond to each other.
2188 """
2189 """
2189 if node == self.nullid:
2190 if node == self.nullid:
2190 raise error.RevlogError(
2191 raise error.RevlogError(
2191 _(b"%s: attempt to add null revision") % self.indexfile
2192 _(b"%s: attempt to add null revision") % self.indexfile
2192 )
2193 )
2193 if (
2194 if (
2194 node == self.nodeconstants.wdirid
2195 node == self.nodeconstants.wdirid
2195 or node in self.nodeconstants.wdirfilenodeids
2196 or node in self.nodeconstants.wdirfilenodeids
2196 ):
2197 ):
2197 raise error.RevlogError(
2198 raise error.RevlogError(
2198 _(b"%s: attempt to add wdir revision") % self.indexfile
2199 _(b"%s: attempt to add wdir revision") % self.indexfile
2199 )
2200 )
2200
2201
2201 if self._inline:
2202 if self._inline:
2202 fh = ifh
2203 fh = ifh
2203 else:
2204 else:
2204 fh = dfh
2205 fh = dfh
2205
2206
2206 btext = [rawtext]
2207 btext = [rawtext]
2207
2208
2208 curr = len(self)
2209 curr = len(self)
2209 prev = curr - 1
2210 prev = curr - 1
2210
2211
2211 offset = self._get_data_offset(prev)
2212 offset = self._get_data_offset(prev)
2212
2213
2213 if self._concurrencychecker:
2214 if self._concurrencychecker:
2214 if self._inline:
2215 if self._inline:
2215 # offset is "as if" it were in the .d file, so we need to add on
2216 # offset is "as if" it were in the .d file, so we need to add on
2216 # the size of the entry metadata.
2217 # the size of the entry metadata.
2217 self._concurrencychecker(
2218 self._concurrencychecker(
2218 ifh, self.indexfile, offset + curr * self.index.entry_size
2219 ifh, self.indexfile, offset + curr * self.index.entry_size
2219 )
2220 )
2220 else:
2221 else:
2221 # Entries in the .i are a consistent size.
2222 # Entries in the .i are a consistent size.
2222 self._concurrencychecker(
2223 self._concurrencychecker(
2223 ifh, self.indexfile, curr * self.index.entry_size
2224 ifh, self.indexfile, curr * self.index.entry_size
2224 )
2225 )
2225 self._concurrencychecker(dfh, self.datafile, offset)
2226 self._concurrencychecker(dfh, self.datafile, offset)
2226
2227
2227 p1r, p2r = self.rev(p1), self.rev(p2)
2228 p1r, p2r = self.rev(p1), self.rev(p2)
2228
2229
2229 # full versions are inserted when the needed deltas
2230 # full versions are inserted when the needed deltas
2230 # become comparable to the uncompressed text
2231 # become comparable to the uncompressed text
2231 if rawtext is None:
2232 if rawtext is None:
2232 # need rawtext size, before changed by flag processors, which is
2233 # need rawtext size, before changed by flag processors, which is
2233 # the non-raw size. use revlog explicitly to avoid filelog's extra
2234 # the non-raw size. use revlog explicitly to avoid filelog's extra
2234 # logic that might remove metadata size.
2235 # logic that might remove metadata size.
2235 textlen = mdiff.patchedsize(
2236 textlen = mdiff.patchedsize(
2236 revlog.size(self, cachedelta[0]), cachedelta[1]
2237 revlog.size(self, cachedelta[0]), cachedelta[1]
2237 )
2238 )
2238 else:
2239 else:
2239 textlen = len(rawtext)
2240 textlen = len(rawtext)
2240
2241
2241 if deltacomputer is None:
2242 if deltacomputer is None:
2242 deltacomputer = deltautil.deltacomputer(self)
2243 deltacomputer = deltautil.deltacomputer(self)
2243
2244
2244 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2245 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2245
2246
2246 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2247 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2247
2248
2248 if sidedata and self.version & 0xFFFF == REVLOGV2:
2249 if sidedata and self.version & 0xFFFF == REVLOGV2:
2249 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2250 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2250 sidedata_offset = offset + deltainfo.deltalen
2251 sidedata_offset = offset + deltainfo.deltalen
2251 else:
2252 else:
2252 serialized_sidedata = b""
2253 serialized_sidedata = b""
2253 # Don't store the offset if the sidedata is empty, that way
2254 # Don't store the offset if the sidedata is empty, that way
2254 # we can easily detect empty sidedata and they will be no different
2255 # we can easily detect empty sidedata and they will be no different
2255 # than ones we manually add.
2256 # than ones we manually add.
2256 sidedata_offset = 0
2257 sidedata_offset = 0
2257
2258
2258 e = (
2259 e = (
2259 offset_type(offset, flags),
2260 offset_type(offset, flags),
2260 deltainfo.deltalen,
2261 deltainfo.deltalen,
2261 textlen,
2262 textlen,
2262 deltainfo.base,
2263 deltainfo.base,
2263 link,
2264 link,
2264 p1r,
2265 p1r,
2265 p2r,
2266 p2r,
2266 node,
2267 node,
2267 sidedata_offset,
2268 sidedata_offset,
2268 len(serialized_sidedata),
2269 len(serialized_sidedata),
2269 )
2270 )
2270
2271
2271 if self.version & 0xFFFF != REVLOGV2:
2272 if self.version & 0xFFFF != REVLOGV2:
2272 e = e[:8]
2273 e = e[:8]
2273
2274
2274 self.index.append(e)
2275 self.index.append(e)
2275 entry = self.index.entry_binary(curr)
2276 entry = self.index.entry_binary(curr)
2276 if curr == 0:
2277 if curr == 0:
2277 header = self.index.pack_header(self.version)
2278 header = self.index.pack_header(self.version)
2278 entry = header + entry
2279 entry = header + entry
2279 self._writeentry(
2280 self._writeentry(
2280 transaction,
2281 transaction,
2281 ifh,
2282 ifh,
2282 dfh,
2283 dfh,
2283 entry,
2284 entry,
2284 deltainfo.data,
2285 deltainfo.data,
2285 link,
2286 link,
2286 offset,
2287 offset,
2287 serialized_sidedata,
2288 serialized_sidedata,
2288 )
2289 )
2289
2290
2290 rawtext = btext[0]
2291 rawtext = btext[0]
2291
2292
2292 if alwayscache and rawtext is None:
2293 if alwayscache and rawtext is None:
2293 rawtext = deltacomputer.buildtext(revinfo, fh)
2294 rawtext = deltacomputer.buildtext(revinfo, fh)
2294
2295
2295 if type(rawtext) == bytes: # only accept immutable objects
2296 if type(rawtext) == bytes: # only accept immutable objects
2296 self._revisioncache = (node, curr, rawtext)
2297 self._revisioncache = (node, curr, rawtext)
2297 self._chainbasecache[curr] = deltainfo.chainbase
2298 self._chainbasecache[curr] = deltainfo.chainbase
2298 return curr
2299 return curr
2299
2300
2300 def _get_data_offset(self, prev):
2301 def _get_data_offset(self, prev):
2301 """Returns the current offset in the (in-transaction) data file.
2302 """Returns the current offset in the (in-transaction) data file.
2302 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2303 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2303 file to store that information: since sidedata can be rewritten to the
2304 file to store that information: since sidedata can be rewritten to the
2304 end of the data file within a transaction, you can have cases where, for
2305 end of the data file within a transaction, you can have cases where, for
2305 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2306 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2306 to `n - 1`'s sidedata being written after `n`'s data.
2307 to `n - 1`'s sidedata being written after `n`'s data.
2307
2308
2308 TODO cache this in a docket file before getting out of experimental."""
2309 TODO cache this in a docket file before getting out of experimental."""
2309 if self.version & 0xFFFF != REVLOGV2:
2310 if self.version & 0xFFFF != REVLOGV2:
2310 return self.end(prev)
2311 return self.end(prev)
2311
2312
2312 offset = 0
2313 offset = 0
2313 for rev, entry in enumerate(self.index):
2314 for rev, entry in enumerate(self.index):
2314 sidedata_end = entry[8] + entry[9]
2315 sidedata_end = entry[8] + entry[9]
2315 # Sidedata for a previous rev has potentially been written after
2316 # Sidedata for a previous rev has potentially been written after
2316 # this rev's end, so take the max.
2317 # this rev's end, so take the max.
2317 offset = max(self.end(rev), offset, sidedata_end)
2318 offset = max(self.end(rev), offset, sidedata_end)
2318 return offset
2319 return offset
2319
2320
2320 def _writeentry(
2321 def _writeentry(
2321 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2322 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2322 ):
2323 ):
2323 # Files opened in a+ mode have inconsistent behavior on various
2324 # Files opened in a+ mode have inconsistent behavior on various
2324 # platforms. Windows requires that a file positioning call be made
2325 # platforms. Windows requires that a file positioning call be made
2325 # when the file handle transitions between reads and writes. See
2326 # when the file handle transitions between reads and writes. See
2326 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2327 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2327 # platforms, Python or the platform itself can be buggy. Some versions
2328 # platforms, Python or the platform itself can be buggy. Some versions
2328 # of Solaris have been observed to not append at the end of the file
2329 # of Solaris have been observed to not append at the end of the file
2329 # if the file was seeked to before the end. See issue4943 for more.
2330 # if the file was seeked to before the end. See issue4943 for more.
2330 #
2331 #
2331 # We work around this issue by inserting a seek() before writing.
2332 # We work around this issue by inserting a seek() before writing.
2332 # Note: This is likely not necessary on Python 3. However, because
2333 # Note: This is likely not necessary on Python 3. However, because
2333 # the file handle is reused for reads and may be seeked there, we need
2334 # the file handle is reused for reads and may be seeked there, we need
2334 # to be careful before changing this.
2335 # to be careful before changing this.
2335 ifh.seek(0, os.SEEK_END)
2336 ifh.seek(0, os.SEEK_END)
2336 if dfh:
2337 if dfh:
2337 dfh.seek(0, os.SEEK_END)
2338 dfh.seek(0, os.SEEK_END)
2338
2339
2339 curr = len(self) - 1
2340 curr = len(self) - 1
2340 if not self._inline:
2341 if not self._inline:
2341 transaction.add(self.datafile, offset)
2342 transaction.add(self.datafile, offset)
2342 transaction.add(self.indexfile, curr * len(entry))
2343 transaction.add(self.indexfile, curr * len(entry))
2343 if data[0]:
2344 if data[0]:
2344 dfh.write(data[0])
2345 dfh.write(data[0])
2345 dfh.write(data[1])
2346 dfh.write(data[1])
2346 if sidedata:
2347 if sidedata:
2347 dfh.write(sidedata)
2348 dfh.write(sidedata)
2348 ifh.write(entry)
2349 ifh.write(entry)
2349 else:
2350 else:
2350 offset += curr * self.index.entry_size
2351 offset += curr * self.index.entry_size
2351 transaction.add(self.indexfile, offset)
2352 transaction.add(self.indexfile, offset)
2352 ifh.write(entry)
2353 ifh.write(entry)
2353 ifh.write(data[0])
2354 ifh.write(data[0])
2354 ifh.write(data[1])
2355 ifh.write(data[1])
2355 if sidedata:
2356 if sidedata:
2356 ifh.write(sidedata)
2357 ifh.write(sidedata)
2357 self._enforceinlinesize(transaction, ifh)
2358 self._enforceinlinesize(transaction, ifh)
2358 nodemaputil.setup_persistent_nodemap(transaction, self)
2359 nodemaputil.setup_persistent_nodemap(transaction, self)
2359
2360
2360 def addgroup(
2361 def addgroup(
2361 self,
2362 self,
2362 deltas,
2363 deltas,
2363 linkmapper,
2364 linkmapper,
2364 transaction,
2365 transaction,
2365 alwayscache=False,
2366 alwayscache=False,
2366 addrevisioncb=None,
2367 addrevisioncb=None,
2367 duplicaterevisioncb=None,
2368 duplicaterevisioncb=None,
2368 ):
2369 ):
2369 """
2370 """
2370 add a delta group
2371 add a delta group
2371
2372
2372 given a set of deltas, add them to the revision log. the
2373 given a set of deltas, add them to the revision log. the
2373 first delta is against its parent, which should be in our
2374 first delta is against its parent, which should be in our
2374 log, the rest are against the previous delta.
2375 log, the rest are against the previous delta.
2375
2376
2376 If ``addrevisioncb`` is defined, it will be called with arguments of
2377 If ``addrevisioncb`` is defined, it will be called with arguments of
2377 this revlog and the node that was added.
2378 this revlog and the node that was added.
2378 """
2379 """
2379
2380
2380 if self._writinghandles:
2381 if self._writinghandles:
2381 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2382 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2382
2383
2383 r = len(self)
2384 r = len(self)
2384 end = 0
2385 end = 0
2385 if r:
2386 if r:
2386 end = self.end(r - 1)
2387 end = self.end(r - 1)
2387 ifh = self._indexfp(b"a+")
2388 ifh = self._indexfp(b"a+")
2388 isize = r * self.index.entry_size
2389 isize = r * self.index.entry_size
2389 if self._inline:
2390 if self._inline:
2390 transaction.add(self.indexfile, end + isize)
2391 transaction.add(self.indexfile, end + isize)
2391 dfh = None
2392 dfh = None
2392 else:
2393 else:
2393 transaction.add(self.indexfile, isize)
2394 transaction.add(self.indexfile, isize)
2394 transaction.add(self.datafile, end)
2395 transaction.add(self.datafile, end)
2395 dfh = self._datafp(b"a+")
2396 dfh = self._datafp(b"a+")
2396
2397
2397 def flush():
2398 def flush():
2398 if dfh:
2399 if dfh:
2399 dfh.flush()
2400 dfh.flush()
2400 ifh.flush()
2401 ifh.flush()
2401
2402
2402 self._writinghandles = (ifh, dfh)
2403 self._writinghandles = (ifh, dfh)
2403 empty = True
2404 empty = True
2404
2405
2405 try:
2406 try:
2406 deltacomputer = deltautil.deltacomputer(self)
2407 deltacomputer = deltautil.deltacomputer(self)
2407 # loop through our set of deltas
2408 # loop through our set of deltas
2408 for data in deltas:
2409 for data in deltas:
2409 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2410 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2410 link = linkmapper(linknode)
2411 link = linkmapper(linknode)
2411 flags = flags or REVIDX_DEFAULT_FLAGS
2412 flags = flags or REVIDX_DEFAULT_FLAGS
2412
2413
2413 rev = self.index.get_rev(node)
2414 rev = self.index.get_rev(node)
2414 if rev is not None:
2415 if rev is not None:
2415 # this can happen if two branches make the same change
2416 # this can happen if two branches make the same change
2416 self._nodeduplicatecallback(transaction, rev)
2417 self._nodeduplicatecallback(transaction, rev)
2417 if duplicaterevisioncb:
2418 if duplicaterevisioncb:
2418 duplicaterevisioncb(self, rev)
2419 duplicaterevisioncb(self, rev)
2419 empty = False
2420 empty = False
2420 continue
2421 continue
2421
2422
2422 for p in (p1, p2):
2423 for p in (p1, p2):
2423 if not self.index.has_node(p):
2424 if not self.index.has_node(p):
2424 raise error.LookupError(
2425 raise error.LookupError(
2425 p, self.indexfile, _(b'unknown parent')
2426 p, self.indexfile, _(b'unknown parent')
2426 )
2427 )
2427
2428
2428 if not self.index.has_node(deltabase):
2429 if not self.index.has_node(deltabase):
2429 raise error.LookupError(
2430 raise error.LookupError(
2430 deltabase, self.indexfile, _(b'unknown delta base')
2431 deltabase, self.indexfile, _(b'unknown delta base')
2431 )
2432 )
2432
2433
2433 baserev = self.rev(deltabase)
2434 baserev = self.rev(deltabase)
2434
2435
2435 if baserev != nullrev and self.iscensored(baserev):
2436 if baserev != nullrev and self.iscensored(baserev):
2436 # if base is censored, delta must be full replacement in a
2437 # if base is censored, delta must be full replacement in a
2437 # single patch operation
2438 # single patch operation
2438 hlen = struct.calcsize(b">lll")
2439 hlen = struct.calcsize(b">lll")
2439 oldlen = self.rawsize(baserev)
2440 oldlen = self.rawsize(baserev)
2440 newlen = len(delta) - hlen
2441 newlen = len(delta) - hlen
2441 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2442 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2442 raise error.CensoredBaseError(
2443 raise error.CensoredBaseError(
2443 self.indexfile, self.node(baserev)
2444 self.indexfile, self.node(baserev)
2444 )
2445 )
2445
2446
2446 if not flags and self._peek_iscensored(baserev, delta, flush):
2447 if not flags and self._peek_iscensored(baserev, delta, flush):
2447 flags |= REVIDX_ISCENSORED
2448 flags |= REVIDX_ISCENSORED
2448
2449
2449 # We assume consumers of addrevisioncb will want to retrieve
2450 # We assume consumers of addrevisioncb will want to retrieve
2450 # the added revision, which will require a call to
2451 # the added revision, which will require a call to
2451 # revision(). revision() will fast path if there is a cache
2452 # revision(). revision() will fast path if there is a cache
2452 # hit. So, we tell _addrevision() to always cache in this case.
2453 # hit. So, we tell _addrevision() to always cache in this case.
2453 # We're only using addgroup() in the context of changegroup
2454 # We're only using addgroup() in the context of changegroup
2454 # generation so the revision data can always be handled as raw
2455 # generation so the revision data can always be handled as raw
2455 # by the flagprocessor.
2456 # by the flagprocessor.
2456 rev = self._addrevision(
2457 rev = self._addrevision(
2457 node,
2458 node,
2458 None,
2459 None,
2459 transaction,
2460 transaction,
2460 link,
2461 link,
2461 p1,
2462 p1,
2462 p2,
2463 p2,
2463 flags,
2464 flags,
2464 (baserev, delta),
2465 (baserev, delta),
2465 ifh,
2466 ifh,
2466 dfh,
2467 dfh,
2467 alwayscache=alwayscache,
2468 alwayscache=alwayscache,
2468 deltacomputer=deltacomputer,
2469 deltacomputer=deltacomputer,
2469 sidedata=sidedata,
2470 sidedata=sidedata,
2470 )
2471 )
2471
2472
2472 if addrevisioncb:
2473 if addrevisioncb:
2473 addrevisioncb(self, rev)
2474 addrevisioncb(self, rev)
2474 empty = False
2475 empty = False
2475
2476
2476 if not dfh and not self._inline:
2477 if not dfh and not self._inline:
2477 # addrevision switched from inline to conventional
2478 # addrevision switched from inline to conventional
2478 # reopen the index
2479 # reopen the index
2479 ifh.close()
2480 ifh.close()
2480 dfh = self._datafp(b"a+")
2481 dfh = self._datafp(b"a+")
2481 ifh = self._indexfp(b"a+")
2482 ifh = self._indexfp(b"a+")
2482 self._writinghandles = (ifh, dfh)
2483 self._writinghandles = (ifh, dfh)
2483 finally:
2484 finally:
2484 self._writinghandles = None
2485 self._writinghandles = None
2485
2486
2486 if dfh:
2487 if dfh:
2487 dfh.close()
2488 dfh.close()
2488 ifh.close()
2489 ifh.close()
2489 return not empty
2490 return not empty
2490
2491
2491 def iscensored(self, rev):
2492 def iscensored(self, rev):
2492 """Check if a file revision is censored."""
2493 """Check if a file revision is censored."""
2493 if not self._censorable:
2494 if not self._censorable:
2494 return False
2495 return False
2495
2496
2496 return self.flags(rev) & REVIDX_ISCENSORED
2497 return self.flags(rev) & REVIDX_ISCENSORED
2497
2498
2498 def _peek_iscensored(self, baserev, delta, flush):
2499 def _peek_iscensored(self, baserev, delta, flush):
2499 """Quickly check if a delta produces a censored revision."""
2500 """Quickly check if a delta produces a censored revision."""
2500 if not self._censorable:
2501 if not self._censorable:
2501 return False
2502 return False
2502
2503
2503 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2504 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2504
2505
2505 def getstrippoint(self, minlink):
2506 def getstrippoint(self, minlink):
2506 """find the minimum rev that must be stripped to strip the linkrev
2507 """find the minimum rev that must be stripped to strip the linkrev
2507
2508
2508 Returns a tuple containing the minimum rev and a set of all revs that
2509 Returns a tuple containing the minimum rev and a set of all revs that
2509 have linkrevs that will be broken by this strip.
2510 have linkrevs that will be broken by this strip.
2510 """
2511 """
2511 return storageutil.resolvestripinfo(
2512 return storageutil.resolvestripinfo(
2512 minlink,
2513 minlink,
2513 len(self) - 1,
2514 len(self) - 1,
2514 self.headrevs(),
2515 self.headrevs(),
2515 self.linkrev,
2516 self.linkrev,
2516 self.parentrevs,
2517 self.parentrevs,
2517 )
2518 )
2518
2519
2519 def strip(self, minlink, transaction):
2520 def strip(self, minlink, transaction):
2520 """truncate the revlog on the first revision with a linkrev >= minlink
2521 """truncate the revlog on the first revision with a linkrev >= minlink
2521
2522
2522 This function is called when we're stripping revision minlink and
2523 This function is called when we're stripping revision minlink and
2523 its descendants from the repository.
2524 its descendants from the repository.
2524
2525
2525 We have to remove all revisions with linkrev >= minlink, because
2526 We have to remove all revisions with linkrev >= minlink, because
2526 the equivalent changelog revisions will be renumbered after the
2527 the equivalent changelog revisions will be renumbered after the
2527 strip.
2528 strip.
2528
2529
2529 So we truncate the revlog on the first of these revisions, and
2530 So we truncate the revlog on the first of these revisions, and
2530 trust that the caller has saved the revisions that shouldn't be
2531 trust that the caller has saved the revisions that shouldn't be
2531 removed and that it'll re-add them after this truncation.
2532 removed and that it'll re-add them after this truncation.
2532 """
2533 """
2533 if len(self) == 0:
2534 if len(self) == 0:
2534 return
2535 return
2535
2536
2536 rev, _ = self.getstrippoint(minlink)
2537 rev, _ = self.getstrippoint(minlink)
2537 if rev == len(self):
2538 if rev == len(self):
2538 return
2539 return
2539
2540
2540 # first truncate the files on disk
2541 # first truncate the files on disk
2541 end = self.start(rev)
2542 end = self.start(rev)
2542 if not self._inline:
2543 if not self._inline:
2543 transaction.add(self.datafile, end)
2544 transaction.add(self.datafile, end)
2544 end = rev * self.index.entry_size
2545 end = rev * self.index.entry_size
2545 else:
2546 else:
2546 end += rev * self.index.entry_size
2547 end += rev * self.index.entry_size
2547
2548
2548 transaction.add(self.indexfile, end)
2549 transaction.add(self.indexfile, end)
2549
2550
2550 # then reset internal state in memory to forget those revisions
2551 # then reset internal state in memory to forget those revisions
2551 self._revisioncache = None
2552 self._revisioncache = None
2552 self._chaininfocache = util.lrucachedict(500)
2553 self._chaininfocache = util.lrucachedict(500)
2553 self._chunkclear()
2554 self._chunkclear()
2554
2555
2555 del self.index[rev:-1]
2556 del self.index[rev:-1]
2556
2557
2557 def checksize(self):
2558 def checksize(self):
2558 """Check size of index and data files
2559 """Check size of index and data files
2559
2560
2560 return a (dd, di) tuple.
2561 return a (dd, di) tuple.
2561 - dd: extra bytes for the "data" file
2562 - dd: extra bytes for the "data" file
2562 - di: extra bytes for the "index" file
2563 - di: extra bytes for the "index" file
2563
2564
2564 A healthy revlog will return (0, 0).
2565 A healthy revlog will return (0, 0).
2565 """
2566 """
2566 expected = 0
2567 expected = 0
2567 if len(self):
2568 if len(self):
2568 expected = max(0, self.end(len(self) - 1))
2569 expected = max(0, self.end(len(self) - 1))
2569
2570
2570 try:
2571 try:
2571 with self._datafp() as f:
2572 with self._datafp() as f:
2572 f.seek(0, io.SEEK_END)
2573 f.seek(0, io.SEEK_END)
2573 actual = f.tell()
2574 actual = f.tell()
2574 dd = actual - expected
2575 dd = actual - expected
2575 except IOError as inst:
2576 except IOError as inst:
2576 if inst.errno != errno.ENOENT:
2577 if inst.errno != errno.ENOENT:
2577 raise
2578 raise
2578 dd = 0
2579 dd = 0
2579
2580
2580 try:
2581 try:
2581 f = self.opener(self.indexfile)
2582 f = self.opener(self.indexfile)
2582 f.seek(0, io.SEEK_END)
2583 f.seek(0, io.SEEK_END)
2583 actual = f.tell()
2584 actual = f.tell()
2584 f.close()
2585 f.close()
2585 s = self.index.entry_size
2586 s = self.index.entry_size
2586 i = max(0, actual // s)
2587 i = max(0, actual // s)
2587 di = actual - (i * s)
2588 di = actual - (i * s)
2588 if self._inline:
2589 if self._inline:
2589 databytes = 0
2590 databytes = 0
2590 for r in self:
2591 for r in self:
2591 databytes += max(0, self.length(r))
2592 databytes += max(0, self.length(r))
2592 dd = 0
2593 dd = 0
2593 di = actual - len(self) * s - databytes
2594 di = actual - len(self) * s - databytes
2594 except IOError as inst:
2595 except IOError as inst:
2595 if inst.errno != errno.ENOENT:
2596 if inst.errno != errno.ENOENT:
2596 raise
2597 raise
2597 di = 0
2598 di = 0
2598
2599
2599 return (dd, di)
2600 return (dd, di)
2600
2601
2601 def files(self):
2602 def files(self):
2602 res = [self.indexfile]
2603 res = [self.indexfile]
2603 if not self._inline:
2604 if not self._inline:
2604 res.append(self.datafile)
2605 res.append(self.datafile)
2605 return res
2606 return res
2606
2607
2607 def emitrevisions(
2608 def emitrevisions(
2608 self,
2609 self,
2609 nodes,
2610 nodes,
2610 nodesorder=None,
2611 nodesorder=None,
2611 revisiondata=False,
2612 revisiondata=False,
2612 assumehaveparentrevisions=False,
2613 assumehaveparentrevisions=False,
2613 deltamode=repository.CG_DELTAMODE_STD,
2614 deltamode=repository.CG_DELTAMODE_STD,
2614 sidedata_helpers=None,
2615 sidedata_helpers=None,
2615 ):
2616 ):
2616 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2617 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2617 raise error.ProgrammingError(
2618 raise error.ProgrammingError(
2618 b'unhandled value for nodesorder: %s' % nodesorder
2619 b'unhandled value for nodesorder: %s' % nodesorder
2619 )
2620 )
2620
2621
2621 if nodesorder is None and not self._generaldelta:
2622 if nodesorder is None and not self._generaldelta:
2622 nodesorder = b'storage'
2623 nodesorder = b'storage'
2623
2624
2624 if (
2625 if (
2625 not self._storedeltachains
2626 not self._storedeltachains
2626 and deltamode != repository.CG_DELTAMODE_PREV
2627 and deltamode != repository.CG_DELTAMODE_PREV
2627 ):
2628 ):
2628 deltamode = repository.CG_DELTAMODE_FULL
2629 deltamode = repository.CG_DELTAMODE_FULL
2629
2630
2630 return storageutil.emitrevisions(
2631 return storageutil.emitrevisions(
2631 self,
2632 self,
2632 nodes,
2633 nodes,
2633 nodesorder,
2634 nodesorder,
2634 revlogrevisiondelta,
2635 revlogrevisiondelta,
2635 deltaparentfn=self.deltaparent,
2636 deltaparentfn=self.deltaparent,
2636 candeltafn=self.candelta,
2637 candeltafn=self.candelta,
2637 rawsizefn=self.rawsize,
2638 rawsizefn=self.rawsize,
2638 revdifffn=self.revdiff,
2639 revdifffn=self.revdiff,
2639 flagsfn=self.flags,
2640 flagsfn=self.flags,
2640 deltamode=deltamode,
2641 deltamode=deltamode,
2641 revisiondata=revisiondata,
2642 revisiondata=revisiondata,
2642 assumehaveparentrevisions=assumehaveparentrevisions,
2643 assumehaveparentrevisions=assumehaveparentrevisions,
2643 sidedata_helpers=sidedata_helpers,
2644 sidedata_helpers=sidedata_helpers,
2644 )
2645 )
2645
2646
2646 DELTAREUSEALWAYS = b'always'
2647 DELTAREUSEALWAYS = b'always'
2647 DELTAREUSESAMEREVS = b'samerevs'
2648 DELTAREUSESAMEREVS = b'samerevs'
2648 DELTAREUSENEVER = b'never'
2649 DELTAREUSENEVER = b'never'
2649
2650
2650 DELTAREUSEFULLADD = b'fulladd'
2651 DELTAREUSEFULLADD = b'fulladd'
2651
2652
2652 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2653 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2653
2654
2654 def clone(
2655 def clone(
2655 self,
2656 self,
2656 tr,
2657 tr,
2657 destrevlog,
2658 destrevlog,
2658 addrevisioncb=None,
2659 addrevisioncb=None,
2659 deltareuse=DELTAREUSESAMEREVS,
2660 deltareuse=DELTAREUSESAMEREVS,
2660 forcedeltabothparents=None,
2661 forcedeltabothparents=None,
2661 sidedatacompanion=None,
2662 sidedata_helpers=None,
2662 ):
2663 ):
2663 """Copy this revlog to another, possibly with format changes.
2664 """Copy this revlog to another, possibly with format changes.
2664
2665
2665 The destination revlog will contain the same revisions and nodes.
2666 The destination revlog will contain the same revisions and nodes.
2666 However, it may not be bit-for-bit identical due to e.g. delta encoding
2667 However, it may not be bit-for-bit identical due to e.g. delta encoding
2667 differences.
2668 differences.
2668
2669
2669 The ``deltareuse`` argument control how deltas from the existing revlog
2670 The ``deltareuse`` argument control how deltas from the existing revlog
2670 are preserved in the destination revlog. The argument can have the
2671 are preserved in the destination revlog. The argument can have the
2671 following values:
2672 following values:
2672
2673
2673 DELTAREUSEALWAYS
2674 DELTAREUSEALWAYS
2674 Deltas will always be reused (if possible), even if the destination
2675 Deltas will always be reused (if possible), even if the destination
2675 revlog would not select the same revisions for the delta. This is the
2676 revlog would not select the same revisions for the delta. This is the
2676 fastest mode of operation.
2677 fastest mode of operation.
2677 DELTAREUSESAMEREVS
2678 DELTAREUSESAMEREVS
2678 Deltas will be reused if the destination revlog would pick the same
2679 Deltas will be reused if the destination revlog would pick the same
2679 revisions for the delta. This mode strikes a balance between speed
2680 revisions for the delta. This mode strikes a balance between speed
2680 and optimization.
2681 and optimization.
2681 DELTAREUSENEVER
2682 DELTAREUSENEVER
2682 Deltas will never be reused. This is the slowest mode of execution.
2683 Deltas will never be reused. This is the slowest mode of execution.
2683 This mode can be used to recompute deltas (e.g. if the diff/delta
2684 This mode can be used to recompute deltas (e.g. if the diff/delta
2684 algorithm changes).
2685 algorithm changes).
2685 DELTAREUSEFULLADD
2686 DELTAREUSEFULLADD
2686 Revision will be re-added as if their were new content. This is
2687 Revision will be re-added as if their were new content. This is
2687 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2688 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2688 eg: large file detection and handling.
2689 eg: large file detection and handling.
2689
2690
2690 Delta computation can be slow, so the choice of delta reuse policy can
2691 Delta computation can be slow, so the choice of delta reuse policy can
2691 significantly affect run time.
2692 significantly affect run time.
2692
2693
2693 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2694 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2694 two extremes. Deltas will be reused if they are appropriate. But if the
2695 two extremes. Deltas will be reused if they are appropriate. But if the
2695 delta could choose a better revision, it will do so. This means if you
2696 delta could choose a better revision, it will do so. This means if you
2696 are converting a non-generaldelta revlog to a generaldelta revlog,
2697 are converting a non-generaldelta revlog to a generaldelta revlog,
2697 deltas will be recomputed if the delta's parent isn't a parent of the
2698 deltas will be recomputed if the delta's parent isn't a parent of the
2698 revision.
2699 revision.
2699
2700
2700 In addition to the delta policy, the ``forcedeltabothparents``
2701 In addition to the delta policy, the ``forcedeltabothparents``
2701 argument controls whether to force compute deltas against both parents
2702 argument controls whether to force compute deltas against both parents
2702 for merges. By default, the current default is used.
2703 for merges. By default, the current default is used.
2703
2704
2704 If not None, the `sidedatacompanion` is callable that accept two
2705 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
2705 arguments:
2706
2707 (srcrevlog, rev)
2708
2709 and return a quintet that control changes to sidedata content from the
2710 old revision to the new clone result:
2711
2712 (dropall, filterout, update, new_flags, dropped_flags)
2713
2714 * if `dropall` is True, all sidedata should be dropped
2715 * `filterout` is a set of sidedata keys that should be dropped
2716 * `update` is a mapping of additionnal/new key -> value
2717 * new_flags is a bitfields of new flags that the revision should get
2718 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2719 """
2706 """
2720 if deltareuse not in self.DELTAREUSEALL:
2707 if deltareuse not in self.DELTAREUSEALL:
2721 raise ValueError(
2708 raise ValueError(
2722 _(b'value for deltareuse invalid: %s') % deltareuse
2709 _(b'value for deltareuse invalid: %s') % deltareuse
2723 )
2710 )
2724
2711
2725 if len(destrevlog):
2712 if len(destrevlog):
2726 raise ValueError(_(b'destination revlog is not empty'))
2713 raise ValueError(_(b'destination revlog is not empty'))
2727
2714
2728 if getattr(self, 'filteredrevs', None):
2715 if getattr(self, 'filteredrevs', None):
2729 raise ValueError(_(b'source revlog has filtered revisions'))
2716 raise ValueError(_(b'source revlog has filtered revisions'))
2730 if getattr(destrevlog, 'filteredrevs', None):
2717 if getattr(destrevlog, 'filteredrevs', None):
2731 raise ValueError(_(b'destination revlog has filtered revisions'))
2718 raise ValueError(_(b'destination revlog has filtered revisions'))
2732
2719
2733 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2720 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2734 # if possible.
2721 # if possible.
2735 oldlazydelta = destrevlog._lazydelta
2722 oldlazydelta = destrevlog._lazydelta
2736 oldlazydeltabase = destrevlog._lazydeltabase
2723 oldlazydeltabase = destrevlog._lazydeltabase
2737 oldamd = destrevlog._deltabothparents
2724 oldamd = destrevlog._deltabothparents
2738
2725
2739 try:
2726 try:
2740 if deltareuse == self.DELTAREUSEALWAYS:
2727 if deltareuse == self.DELTAREUSEALWAYS:
2741 destrevlog._lazydeltabase = True
2728 destrevlog._lazydeltabase = True
2742 destrevlog._lazydelta = True
2729 destrevlog._lazydelta = True
2743 elif deltareuse == self.DELTAREUSESAMEREVS:
2730 elif deltareuse == self.DELTAREUSESAMEREVS:
2744 destrevlog._lazydeltabase = False
2731 destrevlog._lazydeltabase = False
2745 destrevlog._lazydelta = True
2732 destrevlog._lazydelta = True
2746 elif deltareuse == self.DELTAREUSENEVER:
2733 elif deltareuse == self.DELTAREUSENEVER:
2747 destrevlog._lazydeltabase = False
2734 destrevlog._lazydeltabase = False
2748 destrevlog._lazydelta = False
2735 destrevlog._lazydelta = False
2749
2736
2750 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2737 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2751
2738
2752 self._clone(
2739 self._clone(
2753 tr,
2740 tr,
2754 destrevlog,
2741 destrevlog,
2755 addrevisioncb,
2742 addrevisioncb,
2756 deltareuse,
2743 deltareuse,
2757 forcedeltabothparents,
2744 forcedeltabothparents,
2758 sidedatacompanion,
2745 sidedata_helpers,
2759 )
2746 )
2760
2747
2761 finally:
2748 finally:
2762 destrevlog._lazydelta = oldlazydelta
2749 destrevlog._lazydelta = oldlazydelta
2763 destrevlog._lazydeltabase = oldlazydeltabase
2750 destrevlog._lazydeltabase = oldlazydeltabase
2764 destrevlog._deltabothparents = oldamd
2751 destrevlog._deltabothparents = oldamd
2765
2752
2766 def _clone(
2753 def _clone(
2767 self,
2754 self,
2768 tr,
2755 tr,
2769 destrevlog,
2756 destrevlog,
2770 addrevisioncb,
2757 addrevisioncb,
2771 deltareuse,
2758 deltareuse,
2772 forcedeltabothparents,
2759 forcedeltabothparents,
2773 sidedatacompanion,
2760 sidedata_helpers,
2774 ):
2761 ):
2775 """perform the core duty of `revlog.clone` after parameter processing"""
2762 """perform the core duty of `revlog.clone` after parameter processing"""
2776 deltacomputer = deltautil.deltacomputer(destrevlog)
2763 deltacomputer = deltautil.deltacomputer(destrevlog)
2777 index = self.index
2764 index = self.index
2778 for rev in self:
2765 for rev in self:
2779 entry = index[rev]
2766 entry = index[rev]
2780
2767
2781 # Some classes override linkrev to take filtered revs into
2768 # Some classes override linkrev to take filtered revs into
2782 # account. Use raw entry from index.
2769 # account. Use raw entry from index.
2783 flags = entry[0] & 0xFFFF
2770 flags = entry[0] & 0xFFFF
2784 linkrev = entry[4]
2771 linkrev = entry[4]
2785 p1 = index[entry[5]][7]
2772 p1 = index[entry[5]][7]
2786 p2 = index[entry[6]][7]
2773 p2 = index[entry[6]][7]
2787 node = entry[7]
2774 node = entry[7]
2788
2775
2789 sidedataactions = (False, [], {}, 0, 0)
2790 if sidedatacompanion is not None:
2791 sidedataactions = sidedatacompanion(self, rev)
2792
2793 # (Possibly) reuse the delta from the revlog if allowed and
2776 # (Possibly) reuse the delta from the revlog if allowed and
2794 # the revlog chunk is a delta.
2777 # the revlog chunk is a delta.
2795 cachedelta = None
2778 cachedelta = None
2796 rawtext = None
2779 rawtext = None
2797 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2780 if deltareuse == self.DELTAREUSEFULLADD:
2798 dropall = sidedataactions[0]
2799 filterout = sidedataactions[1]
2800 update = sidedataactions[2]
2801 new_flags = sidedataactions[3]
2802 dropped_flags = sidedataactions[4]
2803 text, sidedata = self._revisiondata(rev)
2781 text, sidedata = self._revisiondata(rev)
2804 if dropall:
2782
2805 sidedata = {}
2783 if sidedata_helpers is not None:
2806 for key in filterout:
2784 (sidedata, new_flags) = storageutil.run_sidedata_helpers(
2807 sidedata.pop(key, None)
2785 self, sidedata_helpers, sidedata, rev
2808 sidedata.update(update)
2786 )
2809 if not sidedata:
2787 flags = flags | new_flags[0] & ~new_flags[1]
2810 sidedata = None
2811
2812 flags |= new_flags
2813 flags &= ~dropped_flags
2814
2788
2815 destrevlog.addrevision(
2789 destrevlog.addrevision(
2816 text,
2790 text,
2817 tr,
2791 tr,
2818 linkrev,
2792 linkrev,
2819 p1,
2793 p1,
2820 p2,
2794 p2,
2821 cachedelta=cachedelta,
2795 cachedelta=cachedelta,
2822 node=node,
2796 node=node,
2823 flags=flags,
2797 flags=flags,
2824 deltacomputer=deltacomputer,
2798 deltacomputer=deltacomputer,
2825 sidedata=sidedata,
2799 sidedata=sidedata,
2826 )
2800 )
2827 else:
2801 else:
2828 if destrevlog._lazydelta:
2802 if destrevlog._lazydelta:
2829 dp = self.deltaparent(rev)
2803 dp = self.deltaparent(rev)
2830 if dp != nullrev:
2804 if dp != nullrev:
2831 cachedelta = (dp, bytes(self._chunk(rev)))
2805 cachedelta = (dp, bytes(self._chunk(rev)))
2832
2806
2807 sidedata = None
2833 if not cachedelta:
2808 if not cachedelta:
2834 rawtext = self.rawdata(rev)
2809 rawtext, sidedata = self._revisiondata(rev)
2810 if sidedata is None:
2811 sidedata = self.sidedata(rev)
2812
2813 if sidedata_helpers is not None:
2814 (sidedata, new_flags) = storageutil.run_sidedata_helpers(
2815 self, sidedata_helpers, sidedata, rev
2816 )
2817 flags = flags | new_flags[0] & ~new_flags[1]
2835
2818
2836 ifh = destrevlog.opener(
2819 ifh = destrevlog.opener(
2837 destrevlog.indexfile, b'a+', checkambig=False
2820 destrevlog.indexfile, b'a+', checkambig=False
2838 )
2821 )
2839 dfh = None
2822 dfh = None
2840 if not destrevlog._inline:
2823 if not destrevlog._inline:
2841 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2824 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2842 try:
2825 try:
2843 destrevlog._addrevision(
2826 destrevlog._addrevision(
2844 node,
2827 node,
2845 rawtext,
2828 rawtext,
2846 tr,
2829 tr,
2847 linkrev,
2830 linkrev,
2848 p1,
2831 p1,
2849 p2,
2832 p2,
2850 flags,
2833 flags,
2851 cachedelta,
2834 cachedelta,
2852 ifh,
2835 ifh,
2853 dfh,
2836 dfh,
2854 deltacomputer=deltacomputer,
2837 deltacomputer=deltacomputer,
2838 sidedata=sidedata,
2855 )
2839 )
2856 finally:
2840 finally:
2857 if dfh:
2841 if dfh:
2858 dfh.close()
2842 dfh.close()
2859 ifh.close()
2843 ifh.close()
2860
2844
2861 if addrevisioncb:
2845 if addrevisioncb:
2862 addrevisioncb(self, rev, node)
2846 addrevisioncb(self, rev, node)
2863
2847
2864 def censorrevision(self, tr, censornode, tombstone=b''):
2848 def censorrevision(self, tr, censornode, tombstone=b''):
2865 if (self.version & 0xFFFF) == REVLOGV0:
2849 if (self.version & 0xFFFF) == REVLOGV0:
2866 raise error.RevlogError(
2850 raise error.RevlogError(
2867 _(b'cannot censor with version %d revlogs') % self.version
2851 _(b'cannot censor with version %d revlogs') % self.version
2868 )
2852 )
2869
2853
2870 censorrev = self.rev(censornode)
2854 censorrev = self.rev(censornode)
2871 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2855 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2872
2856
2873 if len(tombstone) > self.rawsize(censorrev):
2857 if len(tombstone) > self.rawsize(censorrev):
2874 raise error.Abort(
2858 raise error.Abort(
2875 _(b'censor tombstone must be no longer than censored data')
2859 _(b'censor tombstone must be no longer than censored data')
2876 )
2860 )
2877
2861
2878 # Rewriting the revlog in place is hard. Our strategy for censoring is
2862 # Rewriting the revlog in place is hard. Our strategy for censoring is
2879 # to create a new revlog, copy all revisions to it, then replace the
2863 # to create a new revlog, copy all revisions to it, then replace the
2880 # revlogs on transaction close.
2864 # revlogs on transaction close.
2881
2865
2882 newindexfile = self.indexfile + b'.tmpcensored'
2866 newindexfile = self.indexfile + b'.tmpcensored'
2883 newdatafile = self.datafile + b'.tmpcensored'
2867 newdatafile = self.datafile + b'.tmpcensored'
2884
2868
2885 # This is a bit dangerous. We could easily have a mismatch of state.
2869 # This is a bit dangerous. We could easily have a mismatch of state.
2886 newrl = revlog(
2870 newrl = revlog(
2887 self.opener,
2871 self.opener,
2888 target=self.target,
2872 target=self.target,
2889 indexfile=newindexfile,
2873 indexfile=newindexfile,
2890 datafile=newdatafile,
2874 datafile=newdatafile,
2891 censorable=True,
2875 censorable=True,
2892 )
2876 )
2893 newrl.version = self.version
2877 newrl.version = self.version
2894 newrl._generaldelta = self._generaldelta
2878 newrl._generaldelta = self._generaldelta
2895 newrl._parse_index = self._parse_index
2879 newrl._parse_index = self._parse_index
2896
2880
2897 for rev in self.revs():
2881 for rev in self.revs():
2898 node = self.node(rev)
2882 node = self.node(rev)
2899 p1, p2 = self.parents(node)
2883 p1, p2 = self.parents(node)
2900
2884
2901 if rev == censorrev:
2885 if rev == censorrev:
2902 newrl.addrawrevision(
2886 newrl.addrawrevision(
2903 tombstone,
2887 tombstone,
2904 tr,
2888 tr,
2905 self.linkrev(censorrev),
2889 self.linkrev(censorrev),
2906 p1,
2890 p1,
2907 p2,
2891 p2,
2908 censornode,
2892 censornode,
2909 REVIDX_ISCENSORED,
2893 REVIDX_ISCENSORED,
2910 )
2894 )
2911
2895
2912 if newrl.deltaparent(rev) != nullrev:
2896 if newrl.deltaparent(rev) != nullrev:
2913 raise error.Abort(
2897 raise error.Abort(
2914 _(
2898 _(
2915 b'censored revision stored as delta; '
2899 b'censored revision stored as delta; '
2916 b'cannot censor'
2900 b'cannot censor'
2917 ),
2901 ),
2918 hint=_(
2902 hint=_(
2919 b'censoring of revlogs is not '
2903 b'censoring of revlogs is not '
2920 b'fully implemented; please report '
2904 b'fully implemented; please report '
2921 b'this bug'
2905 b'this bug'
2922 ),
2906 ),
2923 )
2907 )
2924 continue
2908 continue
2925
2909
2926 if self.iscensored(rev):
2910 if self.iscensored(rev):
2927 if self.deltaparent(rev) != nullrev:
2911 if self.deltaparent(rev) != nullrev:
2928 raise error.Abort(
2912 raise error.Abort(
2929 _(
2913 _(
2930 b'cannot censor due to censored '
2914 b'cannot censor due to censored '
2931 b'revision having delta stored'
2915 b'revision having delta stored'
2932 )
2916 )
2933 )
2917 )
2934 rawtext = self._chunk(rev)
2918 rawtext = self._chunk(rev)
2935 else:
2919 else:
2936 rawtext = self.rawdata(rev)
2920 rawtext = self.rawdata(rev)
2937
2921
2938 newrl.addrawrevision(
2922 newrl.addrawrevision(
2939 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2923 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2940 )
2924 )
2941
2925
2942 tr.addbackup(self.indexfile, location=b'store')
2926 tr.addbackup(self.indexfile, location=b'store')
2943 if not self._inline:
2927 if not self._inline:
2944 tr.addbackup(self.datafile, location=b'store')
2928 tr.addbackup(self.datafile, location=b'store')
2945
2929
2946 self.opener.rename(newrl.indexfile, self.indexfile)
2930 self.opener.rename(newrl.indexfile, self.indexfile)
2947 if not self._inline:
2931 if not self._inline:
2948 self.opener.rename(newrl.datafile, self.datafile)
2932 self.opener.rename(newrl.datafile, self.datafile)
2949
2933
2950 self.clearcaches()
2934 self.clearcaches()
2951 self._loadindex()
2935 self._loadindex()
2952
2936
2953 def verifyintegrity(self, state):
2937 def verifyintegrity(self, state):
2954 """Verifies the integrity of the revlog.
2938 """Verifies the integrity of the revlog.
2955
2939
2956 Yields ``revlogproblem`` instances describing problems that are
2940 Yields ``revlogproblem`` instances describing problems that are
2957 found.
2941 found.
2958 """
2942 """
2959 dd, di = self.checksize()
2943 dd, di = self.checksize()
2960 if dd:
2944 if dd:
2961 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2945 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2962 if di:
2946 if di:
2963 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2947 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2964
2948
2965 version = self.version & 0xFFFF
2949 version = self.version & 0xFFFF
2966
2950
2967 # The verifier tells us what version revlog we should be.
2951 # The verifier tells us what version revlog we should be.
2968 if version != state[b'expectedversion']:
2952 if version != state[b'expectedversion']:
2969 yield revlogproblem(
2953 yield revlogproblem(
2970 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2954 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2971 % (self.indexfile, version, state[b'expectedversion'])
2955 % (self.indexfile, version, state[b'expectedversion'])
2972 )
2956 )
2973
2957
2974 state[b'skipread'] = set()
2958 state[b'skipread'] = set()
2975 state[b'safe_renamed'] = set()
2959 state[b'safe_renamed'] = set()
2976
2960
2977 for rev in self:
2961 for rev in self:
2978 node = self.node(rev)
2962 node = self.node(rev)
2979
2963
2980 # Verify contents. 4 cases to care about:
2964 # Verify contents. 4 cases to care about:
2981 #
2965 #
2982 # common: the most common case
2966 # common: the most common case
2983 # rename: with a rename
2967 # rename: with a rename
2984 # meta: file content starts with b'\1\n', the metadata
2968 # meta: file content starts with b'\1\n', the metadata
2985 # header defined in filelog.py, but without a rename
2969 # header defined in filelog.py, but without a rename
2986 # ext: content stored externally
2970 # ext: content stored externally
2987 #
2971 #
2988 # More formally, their differences are shown below:
2972 # More formally, their differences are shown below:
2989 #
2973 #
2990 # | common | rename | meta | ext
2974 # | common | rename | meta | ext
2991 # -------------------------------------------------------
2975 # -------------------------------------------------------
2992 # flags() | 0 | 0 | 0 | not 0
2976 # flags() | 0 | 0 | 0 | not 0
2993 # renamed() | False | True | False | ?
2977 # renamed() | False | True | False | ?
2994 # rawtext[0:2]=='\1\n'| False | True | True | ?
2978 # rawtext[0:2]=='\1\n'| False | True | True | ?
2995 #
2979 #
2996 # "rawtext" means the raw text stored in revlog data, which
2980 # "rawtext" means the raw text stored in revlog data, which
2997 # could be retrieved by "rawdata(rev)". "text"
2981 # could be retrieved by "rawdata(rev)". "text"
2998 # mentioned below is "revision(rev)".
2982 # mentioned below is "revision(rev)".
2999 #
2983 #
3000 # There are 3 different lengths stored physically:
2984 # There are 3 different lengths stored physically:
3001 # 1. L1: rawsize, stored in revlog index
2985 # 1. L1: rawsize, stored in revlog index
3002 # 2. L2: len(rawtext), stored in revlog data
2986 # 2. L2: len(rawtext), stored in revlog data
3003 # 3. L3: len(text), stored in revlog data if flags==0, or
2987 # 3. L3: len(text), stored in revlog data if flags==0, or
3004 # possibly somewhere else if flags!=0
2988 # possibly somewhere else if flags!=0
3005 #
2989 #
3006 # L1 should be equal to L2. L3 could be different from them.
2990 # L1 should be equal to L2. L3 could be different from them.
3007 # "text" may or may not affect commit hash depending on flag
2991 # "text" may or may not affect commit hash depending on flag
3008 # processors (see flagutil.addflagprocessor).
2992 # processors (see flagutil.addflagprocessor).
3009 #
2993 #
3010 # | common | rename | meta | ext
2994 # | common | rename | meta | ext
3011 # -------------------------------------------------
2995 # -------------------------------------------------
3012 # rawsize() | L1 | L1 | L1 | L1
2996 # rawsize() | L1 | L1 | L1 | L1
3013 # size() | L1 | L2-LM | L1(*) | L1 (?)
2997 # size() | L1 | L2-LM | L1(*) | L1 (?)
3014 # len(rawtext) | L2 | L2 | L2 | L2
2998 # len(rawtext) | L2 | L2 | L2 | L2
3015 # len(text) | L2 | L2 | L2 | L3
2999 # len(text) | L2 | L2 | L2 | L3
3016 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3000 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3017 #
3001 #
3018 # LM: length of metadata, depending on rawtext
3002 # LM: length of metadata, depending on rawtext
3019 # (*): not ideal, see comment in filelog.size
3003 # (*): not ideal, see comment in filelog.size
3020 # (?): could be "- len(meta)" if the resolved content has
3004 # (?): could be "- len(meta)" if the resolved content has
3021 # rename metadata
3005 # rename metadata
3022 #
3006 #
3023 # Checks needed to be done:
3007 # Checks needed to be done:
3024 # 1. length check: L1 == L2, in all cases.
3008 # 1. length check: L1 == L2, in all cases.
3025 # 2. hash check: depending on flag processor, we may need to
3009 # 2. hash check: depending on flag processor, we may need to
3026 # use either "text" (external), or "rawtext" (in revlog).
3010 # use either "text" (external), or "rawtext" (in revlog).
3027
3011
3028 try:
3012 try:
3029 skipflags = state.get(b'skipflags', 0)
3013 skipflags = state.get(b'skipflags', 0)
3030 if skipflags:
3014 if skipflags:
3031 skipflags &= self.flags(rev)
3015 skipflags &= self.flags(rev)
3032
3016
3033 _verify_revision(self, skipflags, state, node)
3017 _verify_revision(self, skipflags, state, node)
3034
3018
3035 l1 = self.rawsize(rev)
3019 l1 = self.rawsize(rev)
3036 l2 = len(self.rawdata(node))
3020 l2 = len(self.rawdata(node))
3037
3021
3038 if l1 != l2:
3022 if l1 != l2:
3039 yield revlogproblem(
3023 yield revlogproblem(
3040 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3024 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3041 node=node,
3025 node=node,
3042 )
3026 )
3043
3027
3044 except error.CensoredNodeError:
3028 except error.CensoredNodeError:
3045 if state[b'erroroncensored']:
3029 if state[b'erroroncensored']:
3046 yield revlogproblem(
3030 yield revlogproblem(
3047 error=_(b'censored file data'), node=node
3031 error=_(b'censored file data'), node=node
3048 )
3032 )
3049 state[b'skipread'].add(node)
3033 state[b'skipread'].add(node)
3050 except Exception as e:
3034 except Exception as e:
3051 yield revlogproblem(
3035 yield revlogproblem(
3052 error=_(b'unpacking %s: %s')
3036 error=_(b'unpacking %s: %s')
3053 % (short(node), stringutil.forcebytestr(e)),
3037 % (short(node), stringutil.forcebytestr(e)),
3054 node=node,
3038 node=node,
3055 )
3039 )
3056 state[b'skipread'].add(node)
3040 state[b'skipread'].add(node)
3057
3041
3058 def storageinfo(
3042 def storageinfo(
3059 self,
3043 self,
3060 exclusivefiles=False,
3044 exclusivefiles=False,
3061 sharedfiles=False,
3045 sharedfiles=False,
3062 revisionscount=False,
3046 revisionscount=False,
3063 trackedsize=False,
3047 trackedsize=False,
3064 storedsize=False,
3048 storedsize=False,
3065 ):
3049 ):
3066 d = {}
3050 d = {}
3067
3051
3068 if exclusivefiles:
3052 if exclusivefiles:
3069 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3053 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3070 if not self._inline:
3054 if not self._inline:
3071 d[b'exclusivefiles'].append((self.opener, self.datafile))
3055 d[b'exclusivefiles'].append((self.opener, self.datafile))
3072
3056
3073 if sharedfiles:
3057 if sharedfiles:
3074 d[b'sharedfiles'] = []
3058 d[b'sharedfiles'] = []
3075
3059
3076 if revisionscount:
3060 if revisionscount:
3077 d[b'revisionscount'] = len(self)
3061 d[b'revisionscount'] = len(self)
3078
3062
3079 if trackedsize:
3063 if trackedsize:
3080 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3064 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3081
3065
3082 if storedsize:
3066 if storedsize:
3083 d[b'storedsize'] = sum(
3067 d[b'storedsize'] = sum(
3084 self.opener.stat(path).st_size for path in self.files()
3068 self.opener.stat(path).st_size for path in self.files()
3085 )
3069 )
3086
3070
3087 return d
3071 return d
3088
3072
3089 def rewrite_sidedata(self, helpers, startrev, endrev):
3073 def rewrite_sidedata(self, helpers, startrev, endrev):
3090 if self.version & 0xFFFF != REVLOGV2:
3074 if self.version & 0xFFFF != REVLOGV2:
3091 return
3075 return
3092 # inline are not yet supported because they suffer from an issue when
3076 # inline are not yet supported because they suffer from an issue when
3093 # rewriting them (since it's not an append-only operation).
3077 # rewriting them (since it's not an append-only operation).
3094 # See issue6485.
3078 # See issue6485.
3095 assert not self._inline
3079 assert not self._inline
3096 if not helpers[1] and not helpers[2]:
3080 if not helpers[1] and not helpers[2]:
3097 # Nothing to generate or remove
3081 # Nothing to generate or remove
3098 return
3082 return
3099
3083
3100 new_entries = []
3084 new_entries = []
3101 # append the new sidedata
3085 # append the new sidedata
3102 with self._datafp(b'a+') as fp:
3086 with self._datafp(b'a+') as fp:
3103 # Maybe this bug still exists, see revlog._writeentry
3087 # Maybe this bug still exists, see revlog._writeentry
3104 fp.seek(0, os.SEEK_END)
3088 fp.seek(0, os.SEEK_END)
3105 current_offset = fp.tell()
3089 current_offset = fp.tell()
3106 for rev in range(startrev, endrev + 1):
3090 for rev in range(startrev, endrev + 1):
3107 entry = self.index[rev]
3091 entry = self.index[rev]
3108 new_sidedata, flags = storageutil.run_sidedata_helpers(
3092 new_sidedata, flags = storageutil.run_sidedata_helpers(
3109 store=self,
3093 store=self,
3110 sidedata_helpers=helpers,
3094 sidedata_helpers=helpers,
3111 sidedata={},
3095 sidedata={},
3112 rev=rev,
3096 rev=rev,
3113 )
3097 )
3114
3098
3115 serialized_sidedata = sidedatautil.serialize_sidedata(
3099 serialized_sidedata = sidedatautil.serialize_sidedata(
3116 new_sidedata
3100 new_sidedata
3117 )
3101 )
3118 if entry[8] != 0 or entry[9] != 0:
3102 if entry[8] != 0 or entry[9] != 0:
3119 # rewriting entries that already have sidedata is not
3103 # rewriting entries that already have sidedata is not
3120 # supported yet, because it introduces garbage data in the
3104 # supported yet, because it introduces garbage data in the
3121 # revlog.
3105 # revlog.
3122 msg = b"Rewriting existing sidedata is not supported yet"
3106 msg = b"Rewriting existing sidedata is not supported yet"
3123 raise error.Abort(msg)
3107 raise error.Abort(msg)
3124
3108
3125 # Apply (potential) flags to add and to remove after running
3109 # Apply (potential) flags to add and to remove after running
3126 # the sidedata helpers
3110 # the sidedata helpers
3127 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3111 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3128 entry = (new_offset_flags,) + entry[1:8]
3112 entry = (new_offset_flags,) + entry[1:8]
3129 entry += (current_offset, len(serialized_sidedata))
3113 entry += (current_offset, len(serialized_sidedata))
3130
3114
3131 fp.write(serialized_sidedata)
3115 fp.write(serialized_sidedata)
3132 new_entries.append(entry)
3116 new_entries.append(entry)
3133 current_offset += len(serialized_sidedata)
3117 current_offset += len(serialized_sidedata)
3134
3118
3135 # rewrite the new index entries
3119 # rewrite the new index entries
3136 with self._indexfp(b'w+') as fp:
3120 with self._indexfp(b'w+') as fp:
3137 fp.seek(startrev * self.index.entry_size)
3121 fp.seek(startrev * self.index.entry_size)
3138 for i, e in enumerate(new_entries):
3122 for i, e in enumerate(new_entries):
3139 rev = startrev + i
3123 rev = startrev + i
3140 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3124 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3141 packed = self.index.entry_binary(rev)
3125 packed = self.index.entry_binary(rev)
3142 if rev == 0:
3126 if rev == 0:
3143 header = self.index.pack_header(self.version)
3127 header = self.index.pack_header(self.version)
3144 packed = header + packed
3128 packed = header + packed
3145 fp.write(packed)
3129 fp.write(packed)
@@ -1,593 +1,594 b''
1 # upgrade.py - functions for in place upgrade of Mercurial repository
1 # upgrade.py - functions for in place upgrade of Mercurial repository
2 #
2 #
3 # Copyright (c) 2016-present, Gregory Szorc
3 # Copyright (c) 2016-present, Gregory Szorc
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import stat
10 import stat
11
11
12 from ..i18n import _
12 from ..i18n import _
13 from ..pycompat import getattr
13 from ..pycompat import getattr
14 from .. import (
14 from .. import (
15 changegroup,
15 changelog,
16 changelog,
16 error,
17 error,
17 filelog,
18 filelog,
18 manifest,
19 manifest,
19 metadata,
20 metadata,
20 pycompat,
21 pycompat,
21 requirements,
22 requirements,
22 revlog,
23 scmutil,
23 scmutil,
24 store,
24 store,
25 util,
25 util,
26 vfs as vfsmod,
26 vfs as vfsmod,
27 )
27 )
28 from ..revlogutils import nodemap
28 from ..revlogutils import (
29 constants as revlogconst,
30 flagutil,
31 nodemap,
32 sidedata as sidedatamod,
33 )
34
35
36 def get_sidedata_helpers(srcrepo, dstrepo):
37 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
38 sequential = pycompat.iswindows or not use_w
39 if not sequential:
40 srcrepo.register_sidedata_computer(
41 revlogconst.KIND_CHANGELOG,
42 sidedatamod.SD_FILES,
43 (sidedatamod.SD_FILES,),
44 metadata._get_worker_sidedata_adder(srcrepo, dstrepo),
45 flagutil.REVIDX_HASCOPIESINFO,
46 replace=True,
47 )
48 return changegroup.get_sidedata_helpers(srcrepo, dstrepo._wanted_sidedata)
29
49
30
50
31 def _revlogfrompath(repo, rl_type, path):
51 def _revlogfrompath(repo, rl_type, path):
32 """Obtain a revlog from a repo path.
52 """Obtain a revlog from a repo path.
33
53
34 An instance of the appropriate class is returned.
54 An instance of the appropriate class is returned.
35 """
55 """
36 if rl_type & store.FILEFLAGS_CHANGELOG:
56 if rl_type & store.FILEFLAGS_CHANGELOG:
37 return changelog.changelog(repo.svfs)
57 return changelog.changelog(repo.svfs)
38 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
58 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
39 mandir = b''
59 mandir = b''
40 if b'/' in path:
60 if b'/' in path:
41 mandir = path.rsplit(b'/', 1)[0]
61 mandir = path.rsplit(b'/', 1)[0]
42 return manifest.manifestrevlog(
62 return manifest.manifestrevlog(
43 repo.nodeconstants, repo.svfs, tree=mandir
63 repo.nodeconstants, repo.svfs, tree=mandir
44 )
64 )
45 else:
65 else:
46 # drop the extension and the `data/` prefix
66 # drop the extension and the `data/` prefix
47 path = path.rsplit(b'.', 1)[0].split(b'/', 1)[1]
67 path = path.rsplit(b'.', 1)[0].split(b'/', 1)[1]
48 return filelog.filelog(repo.svfs, path)
68 return filelog.filelog(repo.svfs, path)
49
69
50
70
51 def _copyrevlog(tr, destrepo, oldrl, rl_type, unencodedname):
71 def _copyrevlog(tr, destrepo, oldrl, rl_type, unencodedname):
52 """copy all relevant files for `oldrl` into `destrepo` store
72 """copy all relevant files for `oldrl` into `destrepo` store
53
73
54 Files are copied "as is" without any transformation. The copy is performed
74 Files are copied "as is" without any transformation. The copy is performed
55 without extra checks. Callers are responsible for making sure the copied
75 without extra checks. Callers are responsible for making sure the copied
56 content is compatible with format of the destination repository.
76 content is compatible with format of the destination repository.
57 """
77 """
58 oldrl = getattr(oldrl, '_revlog', oldrl)
78 oldrl = getattr(oldrl, '_revlog', oldrl)
59 newrl = _revlogfrompath(destrepo, rl_type, unencodedname)
79 newrl = _revlogfrompath(destrepo, rl_type, unencodedname)
60 newrl = getattr(newrl, '_revlog', newrl)
80 newrl = getattr(newrl, '_revlog', newrl)
61
81
62 oldvfs = oldrl.opener
82 oldvfs = oldrl.opener
63 newvfs = newrl.opener
83 newvfs = newrl.opener
64 oldindex = oldvfs.join(oldrl.indexfile)
84 oldindex = oldvfs.join(oldrl.indexfile)
65 newindex = newvfs.join(newrl.indexfile)
85 newindex = newvfs.join(newrl.indexfile)
66 olddata = oldvfs.join(oldrl.datafile)
86 olddata = oldvfs.join(oldrl.datafile)
67 newdata = newvfs.join(newrl.datafile)
87 newdata = newvfs.join(newrl.datafile)
68
88
69 with newvfs(newrl.indexfile, b'w'):
89 with newvfs(newrl.indexfile, b'w'):
70 pass # create all the directories
90 pass # create all the directories
71
91
72 util.copyfile(oldindex, newindex)
92 util.copyfile(oldindex, newindex)
73 copydata = oldrl.opener.exists(oldrl.datafile)
93 copydata = oldrl.opener.exists(oldrl.datafile)
74 if copydata:
94 if copydata:
75 util.copyfile(olddata, newdata)
95 util.copyfile(olddata, newdata)
76
96
77 if rl_type & store.FILEFLAGS_FILELOG:
97 if rl_type & store.FILEFLAGS_FILELOG:
78 destrepo.svfs.fncache.add(unencodedname)
98 destrepo.svfs.fncache.add(unencodedname)
79 if copydata:
99 if copydata:
80 destrepo.svfs.fncache.add(unencodedname[:-2] + b'.d')
100 destrepo.svfs.fncache.add(unencodedname[:-2] + b'.d')
81
101
82
102
83 UPGRADE_CHANGELOG = b"changelog"
103 UPGRADE_CHANGELOG = b"changelog"
84 UPGRADE_MANIFEST = b"manifest"
104 UPGRADE_MANIFEST = b"manifest"
85 UPGRADE_FILELOGS = b"all-filelogs"
105 UPGRADE_FILELOGS = b"all-filelogs"
86
106
87 UPGRADE_ALL_REVLOGS = frozenset(
107 UPGRADE_ALL_REVLOGS = frozenset(
88 [UPGRADE_CHANGELOG, UPGRADE_MANIFEST, UPGRADE_FILELOGS]
108 [UPGRADE_CHANGELOG, UPGRADE_MANIFEST, UPGRADE_FILELOGS]
89 )
109 )
90
110
91
111
92 def getsidedatacompanion(srcrepo, dstrepo):
93 sidedatacompanion = None
94 removedreqs = srcrepo.requirements - dstrepo.requirements
95 addedreqs = dstrepo.requirements - srcrepo.requirements
96 if requirements.SIDEDATA_REQUIREMENT in removedreqs:
97
98 def sidedatacompanion(rl, rev):
99 rl = getattr(rl, '_revlog', rl)
100 if rl.flags(rev) & revlog.REVIDX_SIDEDATA:
101 return True, (), {}, 0, 0
102 return False, (), {}, 0, 0
103
104 elif requirements.COPIESSDC_REQUIREMENT in addedreqs:
105 sidedatacompanion = metadata.getsidedataadder(srcrepo, dstrepo)
106 elif requirements.COPIESSDC_REQUIREMENT in removedreqs:
107 sidedatacompanion = metadata.getsidedataremover(srcrepo, dstrepo)
108 return sidedatacompanion
109
110
111 def matchrevlog(revlogfilter, rl_type):
112 def matchrevlog(revlogfilter, rl_type):
112 """check if a revlog is selected for cloning.
113 """check if a revlog is selected for cloning.
113
114
114 In other words, are there any updates which need to be done on revlog
115 In other words, are there any updates which need to be done on revlog
115 or it can be blindly copied.
116 or it can be blindly copied.
116
117
117 The store entry is checked against the passed filter"""
118 The store entry is checked against the passed filter"""
118 if rl_type & store.FILEFLAGS_CHANGELOG:
119 if rl_type & store.FILEFLAGS_CHANGELOG:
119 return UPGRADE_CHANGELOG in revlogfilter
120 return UPGRADE_CHANGELOG in revlogfilter
120 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
121 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
121 return UPGRADE_MANIFEST in revlogfilter
122 return UPGRADE_MANIFEST in revlogfilter
122 assert rl_type & store.FILEFLAGS_FILELOG
123 assert rl_type & store.FILEFLAGS_FILELOG
123 return UPGRADE_FILELOGS in revlogfilter
124 return UPGRADE_FILELOGS in revlogfilter
124
125
125
126
126 def _perform_clone(
127 def _perform_clone(
127 ui,
128 ui,
128 dstrepo,
129 dstrepo,
129 tr,
130 tr,
130 old_revlog,
131 old_revlog,
131 rl_type,
132 rl_type,
132 unencoded,
133 unencoded,
133 upgrade_op,
134 upgrade_op,
134 sidedatacompanion,
135 sidedata_helpers,
135 oncopiedrevision,
136 oncopiedrevision,
136 ):
137 ):
137 """ returns the new revlog object created"""
138 """ returns the new revlog object created"""
138 newrl = None
139 newrl = None
139 if matchrevlog(upgrade_op.revlogs_to_process, rl_type):
140 if matchrevlog(upgrade_op.revlogs_to_process, rl_type):
140 ui.note(
141 ui.note(
141 _(b'cloning %d revisions from %s\n') % (len(old_revlog), unencoded)
142 _(b'cloning %d revisions from %s\n') % (len(old_revlog), unencoded)
142 )
143 )
143 newrl = _revlogfrompath(dstrepo, rl_type, unencoded)
144 newrl = _revlogfrompath(dstrepo, rl_type, unencoded)
144 old_revlog.clone(
145 old_revlog.clone(
145 tr,
146 tr,
146 newrl,
147 newrl,
147 addrevisioncb=oncopiedrevision,
148 addrevisioncb=oncopiedrevision,
148 deltareuse=upgrade_op.delta_reuse_mode,
149 deltareuse=upgrade_op.delta_reuse_mode,
149 forcedeltabothparents=upgrade_op.force_re_delta_both_parents,
150 forcedeltabothparents=upgrade_op.force_re_delta_both_parents,
150 sidedatacompanion=sidedatacompanion,
151 sidedata_helpers=sidedata_helpers,
151 )
152 )
152 else:
153 else:
153 msg = _(b'blindly copying %s containing %i revisions\n')
154 msg = _(b'blindly copying %s containing %i revisions\n')
154 ui.note(msg % (unencoded, len(old_revlog)))
155 ui.note(msg % (unencoded, len(old_revlog)))
155 _copyrevlog(tr, dstrepo, old_revlog, rl_type, unencoded)
156 _copyrevlog(tr, dstrepo, old_revlog, rl_type, unencoded)
156
157
157 newrl = _revlogfrompath(dstrepo, rl_type, unencoded)
158 newrl = _revlogfrompath(dstrepo, rl_type, unencoded)
158 return newrl
159 return newrl
159
160
160
161
161 def _clonerevlogs(
162 def _clonerevlogs(
162 ui,
163 ui,
163 srcrepo,
164 srcrepo,
164 dstrepo,
165 dstrepo,
165 tr,
166 tr,
166 upgrade_op,
167 upgrade_op,
167 ):
168 ):
168 """Copy revlogs between 2 repos."""
169 """Copy revlogs between 2 repos."""
169 revcount = 0
170 revcount = 0
170 srcsize = 0
171 srcsize = 0
171 srcrawsize = 0
172 srcrawsize = 0
172 dstsize = 0
173 dstsize = 0
173 fcount = 0
174 fcount = 0
174 frevcount = 0
175 frevcount = 0
175 fsrcsize = 0
176 fsrcsize = 0
176 frawsize = 0
177 frawsize = 0
177 fdstsize = 0
178 fdstsize = 0
178 mcount = 0
179 mcount = 0
179 mrevcount = 0
180 mrevcount = 0
180 msrcsize = 0
181 msrcsize = 0
181 mrawsize = 0
182 mrawsize = 0
182 mdstsize = 0
183 mdstsize = 0
183 crevcount = 0
184 crevcount = 0
184 csrcsize = 0
185 csrcsize = 0
185 crawsize = 0
186 crawsize = 0
186 cdstsize = 0
187 cdstsize = 0
187
188
188 alldatafiles = list(srcrepo.store.walk())
189 alldatafiles = list(srcrepo.store.walk())
189 # mapping of data files which needs to be cloned
190 # mapping of data files which needs to be cloned
190 # key is unencoded filename
191 # key is unencoded filename
191 # value is revlog_object_from_srcrepo
192 # value is revlog_object_from_srcrepo
192 manifests = {}
193 manifests = {}
193 changelogs = {}
194 changelogs = {}
194 filelogs = {}
195 filelogs = {}
195
196
196 # Perform a pass to collect metadata. This validates we can open all
197 # Perform a pass to collect metadata. This validates we can open all
197 # source files and allows a unified progress bar to be displayed.
198 # source files and allows a unified progress bar to be displayed.
198 for rl_type, unencoded, encoded, size in alldatafiles:
199 for rl_type, unencoded, encoded, size in alldatafiles:
199 if not rl_type & store.FILEFLAGS_REVLOG_MAIN:
200 if not rl_type & store.FILEFLAGS_REVLOG_MAIN:
200 continue
201 continue
201
202
202 rl = _revlogfrompath(srcrepo, rl_type, unencoded)
203 rl = _revlogfrompath(srcrepo, rl_type, unencoded)
203
204
204 info = rl.storageinfo(
205 info = rl.storageinfo(
205 exclusivefiles=True,
206 exclusivefiles=True,
206 revisionscount=True,
207 revisionscount=True,
207 trackedsize=True,
208 trackedsize=True,
208 storedsize=True,
209 storedsize=True,
209 )
210 )
210
211
211 revcount += info[b'revisionscount'] or 0
212 revcount += info[b'revisionscount'] or 0
212 datasize = info[b'storedsize'] or 0
213 datasize = info[b'storedsize'] or 0
213 rawsize = info[b'trackedsize'] or 0
214 rawsize = info[b'trackedsize'] or 0
214
215
215 srcsize += datasize
216 srcsize += datasize
216 srcrawsize += rawsize
217 srcrawsize += rawsize
217
218
218 # This is for the separate progress bars.
219 # This is for the separate progress bars.
219 if rl_type & store.FILEFLAGS_CHANGELOG:
220 if rl_type & store.FILEFLAGS_CHANGELOG:
220 changelogs[unencoded] = (rl_type, rl)
221 changelogs[unencoded] = (rl_type, rl)
221 crevcount += len(rl)
222 crevcount += len(rl)
222 csrcsize += datasize
223 csrcsize += datasize
223 crawsize += rawsize
224 crawsize += rawsize
224 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
225 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
225 manifests[unencoded] = (rl_type, rl)
226 manifests[unencoded] = (rl_type, rl)
226 mcount += 1
227 mcount += 1
227 mrevcount += len(rl)
228 mrevcount += len(rl)
228 msrcsize += datasize
229 msrcsize += datasize
229 mrawsize += rawsize
230 mrawsize += rawsize
230 elif rl_type & store.FILEFLAGS_FILELOG:
231 elif rl_type & store.FILEFLAGS_FILELOG:
231 filelogs[unencoded] = (rl_type, rl)
232 filelogs[unencoded] = (rl_type, rl)
232 fcount += 1
233 fcount += 1
233 frevcount += len(rl)
234 frevcount += len(rl)
234 fsrcsize += datasize
235 fsrcsize += datasize
235 frawsize += rawsize
236 frawsize += rawsize
236 else:
237 else:
237 error.ProgrammingError(b'unknown revlog type')
238 error.ProgrammingError(b'unknown revlog type')
238
239
239 if not revcount:
240 if not revcount:
240 return
241 return
241
242
242 ui.status(
243 ui.status(
243 _(
244 _(
244 b'migrating %d total revisions (%d in filelogs, %d in manifests, '
245 b'migrating %d total revisions (%d in filelogs, %d in manifests, '
245 b'%d in changelog)\n'
246 b'%d in changelog)\n'
246 )
247 )
247 % (revcount, frevcount, mrevcount, crevcount)
248 % (revcount, frevcount, mrevcount, crevcount)
248 )
249 )
249 ui.status(
250 ui.status(
250 _(b'migrating %s in store; %s tracked data\n')
251 _(b'migrating %s in store; %s tracked data\n')
251 % ((util.bytecount(srcsize), util.bytecount(srcrawsize)))
252 % ((util.bytecount(srcsize), util.bytecount(srcrawsize)))
252 )
253 )
253
254
254 # Used to keep track of progress.
255 # Used to keep track of progress.
255 progress = None
256 progress = None
256
257
257 def oncopiedrevision(rl, rev, node):
258 def oncopiedrevision(rl, rev, node):
258 progress.increment()
259 progress.increment()
259
260
260 sidedatacompanion = getsidedatacompanion(srcrepo, dstrepo)
261 sidedata_helpers = get_sidedata_helpers(srcrepo, dstrepo)
261
262
262 # Migrating filelogs
263 # Migrating filelogs
263 ui.status(
264 ui.status(
264 _(
265 _(
265 b'migrating %d filelogs containing %d revisions '
266 b'migrating %d filelogs containing %d revisions '
266 b'(%s in store; %s tracked data)\n'
267 b'(%s in store; %s tracked data)\n'
267 )
268 )
268 % (
269 % (
269 fcount,
270 fcount,
270 frevcount,
271 frevcount,
271 util.bytecount(fsrcsize),
272 util.bytecount(fsrcsize),
272 util.bytecount(frawsize),
273 util.bytecount(frawsize),
273 )
274 )
274 )
275 )
275 progress = srcrepo.ui.makeprogress(_(b'file revisions'), total=frevcount)
276 progress = srcrepo.ui.makeprogress(_(b'file revisions'), total=frevcount)
276 for unencoded, (rl_type, oldrl) in sorted(filelogs.items()):
277 for unencoded, (rl_type, oldrl) in sorted(filelogs.items()):
277 newrl = _perform_clone(
278 newrl = _perform_clone(
278 ui,
279 ui,
279 dstrepo,
280 dstrepo,
280 tr,
281 tr,
281 oldrl,
282 oldrl,
282 rl_type,
283 rl_type,
283 unencoded,
284 unencoded,
284 upgrade_op,
285 upgrade_op,
285 sidedatacompanion,
286 sidedata_helpers,
286 oncopiedrevision,
287 oncopiedrevision,
287 )
288 )
288 info = newrl.storageinfo(storedsize=True)
289 info = newrl.storageinfo(storedsize=True)
289 fdstsize += info[b'storedsize'] or 0
290 fdstsize += info[b'storedsize'] or 0
290 ui.status(
291 ui.status(
291 _(
292 _(
292 b'finished migrating %d filelog revisions across %d '
293 b'finished migrating %d filelog revisions across %d '
293 b'filelogs; change in size: %s\n'
294 b'filelogs; change in size: %s\n'
294 )
295 )
295 % (frevcount, fcount, util.bytecount(fdstsize - fsrcsize))
296 % (frevcount, fcount, util.bytecount(fdstsize - fsrcsize))
296 )
297 )
297
298
298 # Migrating manifests
299 # Migrating manifests
299 ui.status(
300 ui.status(
300 _(
301 _(
301 b'migrating %d manifests containing %d revisions '
302 b'migrating %d manifests containing %d revisions '
302 b'(%s in store; %s tracked data)\n'
303 b'(%s in store; %s tracked data)\n'
303 )
304 )
304 % (
305 % (
305 mcount,
306 mcount,
306 mrevcount,
307 mrevcount,
307 util.bytecount(msrcsize),
308 util.bytecount(msrcsize),
308 util.bytecount(mrawsize),
309 util.bytecount(mrawsize),
309 )
310 )
310 )
311 )
311 if progress:
312 if progress:
312 progress.complete()
313 progress.complete()
313 progress = srcrepo.ui.makeprogress(
314 progress = srcrepo.ui.makeprogress(
314 _(b'manifest revisions'), total=mrevcount
315 _(b'manifest revisions'), total=mrevcount
315 )
316 )
316 for unencoded, (rl_type, oldrl) in sorted(manifests.items()):
317 for unencoded, (rl_type, oldrl) in sorted(manifests.items()):
317 newrl = _perform_clone(
318 newrl = _perform_clone(
318 ui,
319 ui,
319 dstrepo,
320 dstrepo,
320 tr,
321 tr,
321 oldrl,
322 oldrl,
322 rl_type,
323 rl_type,
323 unencoded,
324 unencoded,
324 upgrade_op,
325 upgrade_op,
325 sidedatacompanion,
326 sidedata_helpers,
326 oncopiedrevision,
327 oncopiedrevision,
327 )
328 )
328 info = newrl.storageinfo(storedsize=True)
329 info = newrl.storageinfo(storedsize=True)
329 mdstsize += info[b'storedsize'] or 0
330 mdstsize += info[b'storedsize'] or 0
330 ui.status(
331 ui.status(
331 _(
332 _(
332 b'finished migrating %d manifest revisions across %d '
333 b'finished migrating %d manifest revisions across %d '
333 b'manifests; change in size: %s\n'
334 b'manifests; change in size: %s\n'
334 )
335 )
335 % (mrevcount, mcount, util.bytecount(mdstsize - msrcsize))
336 % (mrevcount, mcount, util.bytecount(mdstsize - msrcsize))
336 )
337 )
337
338
338 # Migrating changelog
339 # Migrating changelog
339 ui.status(
340 ui.status(
340 _(
341 _(
341 b'migrating changelog containing %d revisions '
342 b'migrating changelog containing %d revisions '
342 b'(%s in store; %s tracked data)\n'
343 b'(%s in store; %s tracked data)\n'
343 )
344 )
344 % (
345 % (
345 crevcount,
346 crevcount,
346 util.bytecount(csrcsize),
347 util.bytecount(csrcsize),
347 util.bytecount(crawsize),
348 util.bytecount(crawsize),
348 )
349 )
349 )
350 )
350 if progress:
351 if progress:
351 progress.complete()
352 progress.complete()
352 progress = srcrepo.ui.makeprogress(
353 progress = srcrepo.ui.makeprogress(
353 _(b'changelog revisions'), total=crevcount
354 _(b'changelog revisions'), total=crevcount
354 )
355 )
355 for unencoded, (rl_type, oldrl) in sorted(changelogs.items()):
356 for unencoded, (rl_type, oldrl) in sorted(changelogs.items()):
356 newrl = _perform_clone(
357 newrl = _perform_clone(
357 ui,
358 ui,
358 dstrepo,
359 dstrepo,
359 tr,
360 tr,
360 oldrl,
361 oldrl,
361 rl_type,
362 rl_type,
362 unencoded,
363 unencoded,
363 upgrade_op,
364 upgrade_op,
364 sidedatacompanion,
365 sidedata_helpers,
365 oncopiedrevision,
366 oncopiedrevision,
366 )
367 )
367 info = newrl.storageinfo(storedsize=True)
368 info = newrl.storageinfo(storedsize=True)
368 cdstsize += info[b'storedsize'] or 0
369 cdstsize += info[b'storedsize'] or 0
369 progress.complete()
370 progress.complete()
370 ui.status(
371 ui.status(
371 _(
372 _(
372 b'finished migrating %d changelog revisions; change in size: '
373 b'finished migrating %d changelog revisions; change in size: '
373 b'%s\n'
374 b'%s\n'
374 )
375 )
375 % (crevcount, util.bytecount(cdstsize - csrcsize))
376 % (crevcount, util.bytecount(cdstsize - csrcsize))
376 )
377 )
377
378
378 dstsize = fdstsize + mdstsize + cdstsize
379 dstsize = fdstsize + mdstsize + cdstsize
379 ui.status(
380 ui.status(
380 _(
381 _(
381 b'finished migrating %d total revisions; total change in store '
382 b'finished migrating %d total revisions; total change in store '
382 b'size: %s\n'
383 b'size: %s\n'
383 )
384 )
384 % (revcount, util.bytecount(dstsize - srcsize))
385 % (revcount, util.bytecount(dstsize - srcsize))
385 )
386 )
386
387
387
388
388 def _files_to_copy_post_revlog_clone(srcrepo):
389 def _files_to_copy_post_revlog_clone(srcrepo):
389 """yields files which should be copied to destination after revlogs
390 """yields files which should be copied to destination after revlogs
390 are cloned"""
391 are cloned"""
391 for path, kind, st in sorted(srcrepo.store.vfs.readdir(b'', stat=True)):
392 for path, kind, st in sorted(srcrepo.store.vfs.readdir(b'', stat=True)):
392 # don't copy revlogs as they are already cloned
393 # don't copy revlogs as they are already cloned
393 if store.revlog_type(path) is not None:
394 if store.revlog_type(path) is not None:
394 continue
395 continue
395 # Skip transaction related files.
396 # Skip transaction related files.
396 if path.startswith(b'undo'):
397 if path.startswith(b'undo'):
397 continue
398 continue
398 # Only copy regular files.
399 # Only copy regular files.
399 if kind != stat.S_IFREG:
400 if kind != stat.S_IFREG:
400 continue
401 continue
401 # Skip other skipped files.
402 # Skip other skipped files.
402 if path in (b'lock', b'fncache'):
403 if path in (b'lock', b'fncache'):
403 continue
404 continue
404 # TODO: should we skip cache too?
405 # TODO: should we skip cache too?
405
406
406 yield path
407 yield path
407
408
408
409
409 def _replacestores(currentrepo, upgradedrepo, backupvfs, upgrade_op):
410 def _replacestores(currentrepo, upgradedrepo, backupvfs, upgrade_op):
410 """Replace the stores after current repository is upgraded
411 """Replace the stores after current repository is upgraded
411
412
412 Creates a backup of current repository store at backup path
413 Creates a backup of current repository store at backup path
413 Replaces upgraded store files in current repo from upgraded one
414 Replaces upgraded store files in current repo from upgraded one
414
415
415 Arguments:
416 Arguments:
416 currentrepo: repo object of current repository
417 currentrepo: repo object of current repository
417 upgradedrepo: repo object of the upgraded data
418 upgradedrepo: repo object of the upgraded data
418 backupvfs: vfs object for the backup path
419 backupvfs: vfs object for the backup path
419 upgrade_op: upgrade operation object
420 upgrade_op: upgrade operation object
420 to be used to decide what all is upgraded
421 to be used to decide what all is upgraded
421 """
422 """
422 # TODO: don't blindly rename everything in store
423 # TODO: don't blindly rename everything in store
423 # There can be upgrades where store is not touched at all
424 # There can be upgrades where store is not touched at all
424 if upgrade_op.backup_store:
425 if upgrade_op.backup_store:
425 util.rename(currentrepo.spath, backupvfs.join(b'store'))
426 util.rename(currentrepo.spath, backupvfs.join(b'store'))
426 else:
427 else:
427 currentrepo.vfs.rmtree(b'store', forcibly=True)
428 currentrepo.vfs.rmtree(b'store', forcibly=True)
428 util.rename(upgradedrepo.spath, currentrepo.spath)
429 util.rename(upgradedrepo.spath, currentrepo.spath)
429
430
430
431
431 def finishdatamigration(ui, srcrepo, dstrepo, requirements):
432 def finishdatamigration(ui, srcrepo, dstrepo, requirements):
432 """Hook point for extensions to perform additional actions during upgrade.
433 """Hook point for extensions to perform additional actions during upgrade.
433
434
434 This function is called after revlogs and store files have been copied but
435 This function is called after revlogs and store files have been copied but
435 before the new store is swapped into the original location.
436 before the new store is swapped into the original location.
436 """
437 """
437
438
438
439
439 def upgrade(ui, srcrepo, dstrepo, upgrade_op):
440 def upgrade(ui, srcrepo, dstrepo, upgrade_op):
440 """Do the low-level work of upgrading a repository.
441 """Do the low-level work of upgrading a repository.
441
442
442 The upgrade is effectively performed as a copy between a source
443 The upgrade is effectively performed as a copy between a source
443 repository and a temporary destination repository.
444 repository and a temporary destination repository.
444
445
445 The source repository is unmodified for as long as possible so the
446 The source repository is unmodified for as long as possible so the
446 upgrade can abort at any time without causing loss of service for
447 upgrade can abort at any time without causing loss of service for
447 readers and without corrupting the source repository.
448 readers and without corrupting the source repository.
448 """
449 """
449 assert srcrepo.currentwlock()
450 assert srcrepo.currentwlock()
450 assert dstrepo.currentwlock()
451 assert dstrepo.currentwlock()
451 backuppath = None
452 backuppath = None
452 backupvfs = None
453 backupvfs = None
453
454
454 ui.status(
455 ui.status(
455 _(
456 _(
456 b'(it is safe to interrupt this process any time before '
457 b'(it is safe to interrupt this process any time before '
457 b'data migration completes)\n'
458 b'data migration completes)\n'
458 )
459 )
459 )
460 )
460
461
461 if upgrade_op.requirements_only:
462 if upgrade_op.requirements_only:
462 ui.status(_(b'upgrading repository requirements\n'))
463 ui.status(_(b'upgrading repository requirements\n'))
463 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
464 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
464 # if there is only one action and that is persistent nodemap upgrade
465 # if there is only one action and that is persistent nodemap upgrade
465 # directly write the nodemap file and update requirements instead of going
466 # directly write the nodemap file and update requirements instead of going
466 # through the whole cloning process
467 # through the whole cloning process
467 elif (
468 elif (
468 len(upgrade_op.upgrade_actions) == 1
469 len(upgrade_op.upgrade_actions) == 1
469 and b'persistent-nodemap' in upgrade_op._upgrade_actions_names
470 and b'persistent-nodemap' in upgrade_op._upgrade_actions_names
470 and not upgrade_op.removed_actions
471 and not upgrade_op.removed_actions
471 ):
472 ):
472 ui.status(
473 ui.status(
473 _(b'upgrading repository to use persistent nodemap feature\n')
474 _(b'upgrading repository to use persistent nodemap feature\n')
474 )
475 )
475 with srcrepo.transaction(b'upgrade') as tr:
476 with srcrepo.transaction(b'upgrade') as tr:
476 unfi = srcrepo.unfiltered()
477 unfi = srcrepo.unfiltered()
477 cl = unfi.changelog
478 cl = unfi.changelog
478 nodemap.persist_nodemap(tr, cl, force=True)
479 nodemap.persist_nodemap(tr, cl, force=True)
479 # we want to directly operate on the underlying revlog to force
480 # we want to directly operate on the underlying revlog to force
480 # create a nodemap file. This is fine since this is upgrade code
481 # create a nodemap file. This is fine since this is upgrade code
481 # and it heavily relies on repository being revlog based
482 # and it heavily relies on repository being revlog based
482 # hence accessing private attributes can be justified
483 # hence accessing private attributes can be justified
483 nodemap.persist_nodemap(
484 nodemap.persist_nodemap(
484 tr, unfi.manifestlog._rootstore._revlog, force=True
485 tr, unfi.manifestlog._rootstore._revlog, force=True
485 )
486 )
486 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
487 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
487 elif (
488 elif (
488 len(upgrade_op.removed_actions) == 1
489 len(upgrade_op.removed_actions) == 1
489 and [
490 and [
490 x
491 x
491 for x in upgrade_op.removed_actions
492 for x in upgrade_op.removed_actions
492 if x.name == b'persistent-nodemap'
493 if x.name == b'persistent-nodemap'
493 ]
494 ]
494 and not upgrade_op.upgrade_actions
495 and not upgrade_op.upgrade_actions
495 ):
496 ):
496 ui.status(
497 ui.status(
497 _(b'downgrading repository to not use persistent nodemap feature\n')
498 _(b'downgrading repository to not use persistent nodemap feature\n')
498 )
499 )
499 with srcrepo.transaction(b'upgrade') as tr:
500 with srcrepo.transaction(b'upgrade') as tr:
500 unfi = srcrepo.unfiltered()
501 unfi = srcrepo.unfiltered()
501 cl = unfi.changelog
502 cl = unfi.changelog
502 nodemap.delete_nodemap(tr, srcrepo, cl)
503 nodemap.delete_nodemap(tr, srcrepo, cl)
503 # check comment 20 lines above for accessing private attributes
504 # check comment 20 lines above for accessing private attributes
504 nodemap.delete_nodemap(
505 nodemap.delete_nodemap(
505 tr, srcrepo, unfi.manifestlog._rootstore._revlog
506 tr, srcrepo, unfi.manifestlog._rootstore._revlog
506 )
507 )
507 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
508 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
508 else:
509 else:
509 with dstrepo.transaction(b'upgrade') as tr:
510 with dstrepo.transaction(b'upgrade') as tr:
510 _clonerevlogs(
511 _clonerevlogs(
511 ui,
512 ui,
512 srcrepo,
513 srcrepo,
513 dstrepo,
514 dstrepo,
514 tr,
515 tr,
515 upgrade_op,
516 upgrade_op,
516 )
517 )
517
518
518 # Now copy other files in the store directory.
519 # Now copy other files in the store directory.
519 for p in _files_to_copy_post_revlog_clone(srcrepo):
520 for p in _files_to_copy_post_revlog_clone(srcrepo):
520 srcrepo.ui.status(_(b'copying %s\n') % p)
521 srcrepo.ui.status(_(b'copying %s\n') % p)
521 src = srcrepo.store.rawvfs.join(p)
522 src = srcrepo.store.rawvfs.join(p)
522 dst = dstrepo.store.rawvfs.join(p)
523 dst = dstrepo.store.rawvfs.join(p)
523 util.copyfile(src, dst, copystat=True)
524 util.copyfile(src, dst, copystat=True)
524
525
525 finishdatamigration(ui, srcrepo, dstrepo, requirements)
526 finishdatamigration(ui, srcrepo, dstrepo, requirements)
526
527
527 ui.status(_(b'data fully upgraded in a temporary repository\n'))
528 ui.status(_(b'data fully upgraded in a temporary repository\n'))
528
529
529 if upgrade_op.backup_store:
530 if upgrade_op.backup_store:
530 backuppath = pycompat.mkdtemp(
531 backuppath = pycompat.mkdtemp(
531 prefix=b'upgradebackup.', dir=srcrepo.path
532 prefix=b'upgradebackup.', dir=srcrepo.path
532 )
533 )
533 backupvfs = vfsmod.vfs(backuppath)
534 backupvfs = vfsmod.vfs(backuppath)
534
535
535 # Make a backup of requires file first, as it is the first to be modified.
536 # Make a backup of requires file first, as it is the first to be modified.
536 util.copyfile(
537 util.copyfile(
537 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
538 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
538 )
539 )
539
540
540 # We install an arbitrary requirement that clients must not support
541 # We install an arbitrary requirement that clients must not support
541 # as a mechanism to lock out new clients during the data swap. This is
542 # as a mechanism to lock out new clients during the data swap. This is
542 # better than allowing a client to continue while the repository is in
543 # better than allowing a client to continue while the repository is in
543 # an inconsistent state.
544 # an inconsistent state.
544 ui.status(
545 ui.status(
545 _(
546 _(
546 b'marking source repository as being upgraded; clients will be '
547 b'marking source repository as being upgraded; clients will be '
547 b'unable to read from repository\n'
548 b'unable to read from repository\n'
548 )
549 )
549 )
550 )
550 scmutil.writereporequirements(
551 scmutil.writereporequirements(
551 srcrepo, srcrepo.requirements | {b'upgradeinprogress'}
552 srcrepo, srcrepo.requirements | {b'upgradeinprogress'}
552 )
553 )
553
554
554 ui.status(_(b'starting in-place swap of repository data\n'))
555 ui.status(_(b'starting in-place swap of repository data\n'))
555 if upgrade_op.backup_store:
556 if upgrade_op.backup_store:
556 ui.status(
557 ui.status(
557 _(b'replaced files will be backed up at %s\n') % backuppath
558 _(b'replaced files will be backed up at %s\n') % backuppath
558 )
559 )
559
560
560 # Now swap in the new store directory. Doing it as a rename should make
561 # Now swap in the new store directory. Doing it as a rename should make
561 # the operation nearly instantaneous and atomic (at least in well-behaved
562 # the operation nearly instantaneous and atomic (at least in well-behaved
562 # environments).
563 # environments).
563 ui.status(_(b'replacing store...\n'))
564 ui.status(_(b'replacing store...\n'))
564 tstart = util.timer()
565 tstart = util.timer()
565 _replacestores(srcrepo, dstrepo, backupvfs, upgrade_op)
566 _replacestores(srcrepo, dstrepo, backupvfs, upgrade_op)
566 elapsed = util.timer() - tstart
567 elapsed = util.timer() - tstart
567 ui.status(
568 ui.status(
568 _(
569 _(
569 b'store replacement complete; repository was inconsistent for '
570 b'store replacement complete; repository was inconsistent for '
570 b'%0.1fs\n'
571 b'%0.1fs\n'
571 )
572 )
572 % elapsed
573 % elapsed
573 )
574 )
574
575
575 # We first write the requirements file. Any new requirements will lock
576 # We first write the requirements file. Any new requirements will lock
576 # out legacy clients.
577 # out legacy clients.
577 ui.status(
578 ui.status(
578 _(
579 _(
579 b'finalizing requirements file and making repository readable '
580 b'finalizing requirements file and making repository readable '
580 b'again\n'
581 b'again\n'
581 )
582 )
582 )
583 )
583 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
584 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
584
585
585 if upgrade_op.backup_store:
586 if upgrade_op.backup_store:
586 # The lock file from the old store won't be removed because nothing has a
587 # The lock file from the old store won't be removed because nothing has a
587 # reference to its new location. So clean it up manually. Alternatively, we
588 # reference to its new location. So clean it up manually. Alternatively, we
588 # could update srcrepo.svfs and other variables to point to the new
589 # could update srcrepo.svfs and other variables to point to the new
589 # location. This is simpler.
590 # location. This is simpler.
590 assert backupvfs is not None # help pytype
591 assert backupvfs is not None # help pytype
591 backupvfs.unlink(b'store/lock')
592 backupvfs.unlink(b'store/lock')
592
593
593 return backuppath
594 return backuppath
@@ -1,507 +1,503 b''
1 #testcases extra sidedata
1 #testcases extra sidedata
2
2
3 #if extra
3 #if extra
4 $ cat >> $HGRCPATH << EOF
4 $ cat >> $HGRCPATH << EOF
5 > [experimental]
5 > [experimental]
6 > copies.write-to=changeset-only
6 > copies.write-to=changeset-only
7 > copies.read-from=changeset-only
7 > copies.read-from=changeset-only
8 > [alias]
8 > [alias]
9 > changesetcopies = log -r . -T 'files: {files}
9 > changesetcopies = log -r . -T 'files: {files}
10 > {extras % "{ifcontains("files", key, "{key}: {value}\n")}"}
10 > {extras % "{ifcontains("files", key, "{key}: {value}\n")}"}
11 > {extras % "{ifcontains("copies", key, "{key}: {value}\n")}"}'
11 > {extras % "{ifcontains("copies", key, "{key}: {value}\n")}"}'
12 > EOF
12 > EOF
13 #endif
13 #endif
14
14
15 #if sidedata
15 #if sidedata
16 $ cat >> $HGRCPATH << EOF
16 $ cat >> $HGRCPATH << EOF
17 > [format]
17 > [format]
18 > exp-use-copies-side-data-changeset = yes
18 > exp-use-copies-side-data-changeset = yes
19 > EOF
19 > EOF
20 #endif
20 #endif
21
21
22 $ cat >> $HGRCPATH << EOF
22 $ cat >> $HGRCPATH << EOF
23 > [alias]
23 > [alias]
24 > showcopies = log -r . -T '{file_copies % "{source} -> {name}\n"}'
24 > showcopies = log -r . -T '{file_copies % "{source} -> {name}\n"}'
25 > [extensions]
25 > [extensions]
26 > rebase =
26 > rebase =
27 > split =
27 > split =
28 > EOF
28 > EOF
29
29
30 Check that copies are recorded correctly
30 Check that copies are recorded correctly
31
31
32 $ hg init repo
32 $ hg init repo
33 $ cd repo
33 $ cd repo
34 #if sidedata
34 #if sidedata
35 $ hg debugformat -v
35 $ hg debugformat -v
36 format-variant repo config default
36 format-variant repo config default
37 fncache: yes yes yes
37 fncache: yes yes yes
38 dotencode: yes yes yes
38 dotencode: yes yes yes
39 generaldelta: yes yes yes
39 generaldelta: yes yes yes
40 share-safe: no no no
40 share-safe: no no no
41 sparserevlog: yes yes yes
41 sparserevlog: yes yes yes
42 persistent-nodemap: no no no (no-rust !)
42 persistent-nodemap: no no no (no-rust !)
43 persistent-nodemap: yes yes no (rust !)
43 persistent-nodemap: yes yes no (rust !)
44 copies-sdc: yes yes no
44 copies-sdc: yes yes no
45 revlog-v2: yes yes no
45 revlog-v2: yes yes no
46 plain-cl-delta: yes yes yes
46 plain-cl-delta: yes yes yes
47 compression: zlib zlib zlib (no-zstd !)
47 compression: zlib zlib zlib (no-zstd !)
48 compression: zstd zstd zstd (zstd !)
48 compression: zstd zstd zstd (zstd !)
49 compression-level: default default default
49 compression-level: default default default
50 #else
50 #else
51 $ hg debugformat -v
51 $ hg debugformat -v
52 format-variant repo config default
52 format-variant repo config default
53 fncache: yes yes yes
53 fncache: yes yes yes
54 dotencode: yes yes yes
54 dotencode: yes yes yes
55 generaldelta: yes yes yes
55 generaldelta: yes yes yes
56 share-safe: no no no
56 share-safe: no no no
57 sparserevlog: yes yes yes
57 sparserevlog: yes yes yes
58 persistent-nodemap: no no no (no-rust !)
58 persistent-nodemap: no no no (no-rust !)
59 persistent-nodemap: yes yes no (rust !)
59 persistent-nodemap: yes yes no (rust !)
60 copies-sdc: no no no
60 copies-sdc: no no no
61 revlog-v2: no no no
61 revlog-v2: no no no
62 plain-cl-delta: yes yes yes
62 plain-cl-delta: yes yes yes
63 compression: zlib zlib zlib (no-zstd !)
63 compression: zlib zlib zlib (no-zstd !)
64 compression: zstd zstd zstd (zstd !)
64 compression: zstd zstd zstd (zstd !)
65 compression-level: default default default
65 compression-level: default default default
66 #endif
66 #endif
67 $ echo a > a
67 $ echo a > a
68 $ hg add a
68 $ hg add a
69 $ hg ci -m initial
69 $ hg ci -m initial
70 $ hg cp a b
70 $ hg cp a b
71 $ hg cp a c
71 $ hg cp a c
72 $ hg cp a d
72 $ hg cp a d
73 $ hg ci -m 'copy a to b, c, and d'
73 $ hg ci -m 'copy a to b, c, and d'
74
74
75 #if extra
75 #if extra
76
76
77 $ hg changesetcopies
77 $ hg changesetcopies
78 files: b c d
78 files: b c d
79 filesadded: 0
79 filesadded: 0
80 1
80 1
81 2
81 2
82
82
83 p1copies: 0\x00a (esc)
83 p1copies: 0\x00a (esc)
84 1\x00a (esc)
84 1\x00a (esc)
85 2\x00a (esc)
85 2\x00a (esc)
86 #else
86 #else
87 $ hg debugsidedata -c -v -- -1
87 $ hg debugsidedata -c -v -- -1
88 1 sidedata entries
88 1 sidedata entries
89 entry-0014 size 44
89 entry-0014 size 44
90 '\x00\x00\x00\x04\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00\x06\x00\x00\x00\x03\x00\x00\x00\x00\x06\x00\x00\x00\x04\x00\x00\x00\x00abcd'
90 '\x00\x00\x00\x04\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00\x06\x00\x00\x00\x03\x00\x00\x00\x00\x06\x00\x00\x00\x04\x00\x00\x00\x00abcd'
91 #endif
91 #endif
92
92
93 $ hg showcopies
93 $ hg showcopies
94 a -> b
94 a -> b
95 a -> c
95 a -> c
96 a -> d
96 a -> d
97
97
98 #if extra
98 #if extra
99
99
100 $ hg showcopies --config experimental.copies.read-from=compatibility
100 $ hg showcopies --config experimental.copies.read-from=compatibility
101 a -> b
101 a -> b
102 a -> c
102 a -> c
103 a -> d
103 a -> d
104 $ hg showcopies --config experimental.copies.read-from=filelog-only
104 $ hg showcopies --config experimental.copies.read-from=filelog-only
105
105
106 #endif
106 #endif
107
107
108 Check that renames are recorded correctly
108 Check that renames are recorded correctly
109
109
110 $ hg mv b b2
110 $ hg mv b b2
111 $ hg ci -m 'rename b to b2'
111 $ hg ci -m 'rename b to b2'
112
112
113 #if extra
113 #if extra
114
114
115 $ hg changesetcopies
115 $ hg changesetcopies
116 files: b b2
116 files: b b2
117 filesadded: 1
117 filesadded: 1
118 filesremoved: 0
118 filesremoved: 0
119
119
120 p1copies: 1\x00b (esc)
120 p1copies: 1\x00b (esc)
121
121
122 #else
122 #else
123 $ hg debugsidedata -c -v -- -1
123 $ hg debugsidedata -c -v -- -1
124 1 sidedata entries
124 1 sidedata entries
125 entry-0014 size 25
125 entry-0014 size 25
126 '\x00\x00\x00\x02\x0c\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x03\x00\x00\x00\x00bb2'
126 '\x00\x00\x00\x02\x0c\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x03\x00\x00\x00\x00bb2'
127 #endif
127 #endif
128
128
129 $ hg showcopies
129 $ hg showcopies
130 b -> b2
130 b -> b2
131
131
132
132
133 Rename onto existing file. This should get recorded in the changeset files list and in the extras,
133 Rename onto existing file. This should get recorded in the changeset files list and in the extras,
134 even though there is no filelog entry.
134 even though there is no filelog entry.
135
135
136 $ hg cp b2 c --force
136 $ hg cp b2 c --force
137 $ hg st --copies
137 $ hg st --copies
138 M c
138 M c
139 b2
139 b2
140
140
141 #if extra
141 #if extra
142
142
143 $ hg debugindex c
143 $ hg debugindex c
144 rev linkrev nodeid p1 p2
144 rev linkrev nodeid p1 p2
145 0 1 b789fdd96dc2 000000000000 000000000000
145 0 1 b789fdd96dc2 000000000000 000000000000
146
146
147 #else
147 #else
148
148
149 $ hg debugindex c
149 $ hg debugindex c
150 rev linkrev nodeid p1 p2
150 rev linkrev nodeid p1 p2
151 0 1 37d9b5d994ea 000000000000 000000000000
151 0 1 37d9b5d994ea 000000000000 000000000000
152
152
153 #endif
153 #endif
154
154
155
155
156 $ hg ci -m 'move b onto d'
156 $ hg ci -m 'move b onto d'
157
157
158 #if extra
158 #if extra
159
159
160 $ hg changesetcopies
160 $ hg changesetcopies
161 files: c
161 files: c
162
162
163 p1copies: 0\x00b2 (esc)
163 p1copies: 0\x00b2 (esc)
164
164
165 #else
165 #else
166 $ hg debugsidedata -c -v -- -1
166 $ hg debugsidedata -c -v -- -1
167 1 sidedata entries
167 1 sidedata entries
168 entry-0014 size 25
168 entry-0014 size 25
169 '\x00\x00\x00\x02\x00\x00\x00\x00\x02\x00\x00\x00\x00\x16\x00\x00\x00\x03\x00\x00\x00\x00b2c'
169 '\x00\x00\x00\x02\x00\x00\x00\x00\x02\x00\x00\x00\x00\x16\x00\x00\x00\x03\x00\x00\x00\x00b2c'
170 #endif
170 #endif
171
171
172 $ hg showcopies
172 $ hg showcopies
173 b2 -> c
173 b2 -> c
174
174
175 #if extra
175 #if extra
176
176
177 $ hg debugindex c
177 $ hg debugindex c
178 rev linkrev nodeid p1 p2
178 rev linkrev nodeid p1 p2
179 0 1 b789fdd96dc2 000000000000 000000000000
179 0 1 b789fdd96dc2 000000000000 000000000000
180
180
181 #else
181 #else
182
182
183 $ hg debugindex c
183 $ hg debugindex c
184 rev linkrev nodeid p1 p2
184 rev linkrev nodeid p1 p2
185 0 1 37d9b5d994ea 000000000000 000000000000
185 0 1 37d9b5d994ea 000000000000 000000000000
186 1 3 029625640347 000000000000 000000000000
186 1 3 029625640347 000000000000 000000000000
187
187
188 #endif
188 #endif
189
189
190 Create a merge commit with copying done during merge.
190 Create a merge commit with copying done during merge.
191
191
192 $ hg co 0
192 $ hg co 0
193 0 files updated, 0 files merged, 3 files removed, 0 files unresolved
193 0 files updated, 0 files merged, 3 files removed, 0 files unresolved
194 $ hg cp a e
194 $ hg cp a e
195 $ hg cp a f
195 $ hg cp a f
196 $ hg ci -m 'copy a to e and f'
196 $ hg ci -m 'copy a to e and f'
197 created new head
197 created new head
198 $ hg merge 3
198 $ hg merge 3
199 3 files updated, 0 files merged, 0 files removed, 0 files unresolved
199 3 files updated, 0 files merged, 0 files removed, 0 files unresolved
200 (branch merge, don't forget to commit)
200 (branch merge, don't forget to commit)
201 File 'a' exists on both sides, so 'g' could be recorded as being from p1 or p2, but we currently
201 File 'a' exists on both sides, so 'g' could be recorded as being from p1 or p2, but we currently
202 always record it as being from p1
202 always record it as being from p1
203 $ hg cp a g
203 $ hg cp a g
204 File 'd' exists only in p2, so 'h' should be from p2
204 File 'd' exists only in p2, so 'h' should be from p2
205 $ hg cp d h
205 $ hg cp d h
206 File 'f' exists only in p1, so 'i' should be from p1
206 File 'f' exists only in p1, so 'i' should be from p1
207 $ hg cp f i
207 $ hg cp f i
208 $ hg ci -m 'merge'
208 $ hg ci -m 'merge'
209
209
210 #if extra
210 #if extra
211
211
212 $ hg changesetcopies
212 $ hg changesetcopies
213 files: g h i
213 files: g h i
214 filesadded: 0
214 filesadded: 0
215 1
215 1
216 2
216 2
217
217
218 p1copies: 0\x00a (esc)
218 p1copies: 0\x00a (esc)
219 2\x00f (esc)
219 2\x00f (esc)
220 p2copies: 1\x00d (esc)
220 p2copies: 1\x00d (esc)
221
221
222 #else
222 #else
223 $ hg debugsidedata -c -v -- -1
223 $ hg debugsidedata -c -v -- -1
224 1 sidedata entries
224 1 sidedata entries
225 entry-0014 size 64
225 entry-0014 size 64
226 '\x00\x00\x00\x06\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x06\x00\x00\x00\x04\x00\x00\x00\x00\x07\x00\x00\x00\x05\x00\x00\x00\x01\x06\x00\x00\x00\x06\x00\x00\x00\x02adfghi'
226 '\x00\x00\x00\x06\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x06\x00\x00\x00\x04\x00\x00\x00\x00\x07\x00\x00\x00\x05\x00\x00\x00\x01\x06\x00\x00\x00\x06\x00\x00\x00\x02adfghi'
227 #endif
227 #endif
228
228
229 $ hg showcopies
229 $ hg showcopies
230 a -> g
230 a -> g
231 d -> h
231 d -> h
232 f -> i
232 f -> i
233
233
234 Test writing to both changeset and filelog
234 Test writing to both changeset and filelog
235
235
236 $ hg cp a j
236 $ hg cp a j
237 #if extra
237 #if extra
238 $ hg ci -m 'copy a to j' --config experimental.copies.write-to=compatibility
238 $ hg ci -m 'copy a to j' --config experimental.copies.write-to=compatibility
239 $ hg changesetcopies
239 $ hg changesetcopies
240 files: j
240 files: j
241 filesadded: 0
241 filesadded: 0
242 filesremoved:
242 filesremoved:
243
243
244 p1copies: 0\x00a (esc)
244 p1copies: 0\x00a (esc)
245 p2copies:
245 p2copies:
246 #else
246 #else
247 $ hg ci -m 'copy a to j'
247 $ hg ci -m 'copy a to j'
248 $ hg debugsidedata -c -v -- -1
248 $ hg debugsidedata -c -v -- -1
249 1 sidedata entries
249 1 sidedata entries
250 entry-0014 size 24
250 entry-0014 size 24
251 '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00aj'
251 '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00aj'
252 #endif
252 #endif
253 $ hg debugdata j 0
253 $ hg debugdata j 0
254 \x01 (esc)
254 \x01 (esc)
255 copy: a
255 copy: a
256 copyrev: b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3
256 copyrev: b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3
257 \x01 (esc)
257 \x01 (esc)
258 a
258 a
259 $ hg showcopies
259 $ hg showcopies
260 a -> j
260 a -> j
261 $ hg showcopies --config experimental.copies.read-from=compatibility
261 $ hg showcopies --config experimental.copies.read-from=compatibility
262 a -> j
262 a -> j
263 $ hg showcopies --config experimental.copies.read-from=filelog-only
263 $ hg showcopies --config experimental.copies.read-from=filelog-only
264 a -> j
264 a -> j
265 Existing copy information in the changeset gets removed on amend and writing
265 Existing copy information in the changeset gets removed on amend and writing
266 copy information on to the filelog
266 copy information on to the filelog
267 #if extra
267 #if extra
268 $ hg ci --amend -m 'copy a to j, v2' \
268 $ hg ci --amend -m 'copy a to j, v2' \
269 > --config experimental.copies.write-to=filelog-only
269 > --config experimental.copies.write-to=filelog-only
270 saved backup bundle to $TESTTMP/repo/.hg/strip-backup/*-*-amend.hg (glob)
270 saved backup bundle to $TESTTMP/repo/.hg/strip-backup/*-*-amend.hg (glob)
271 $ hg changesetcopies
271 $ hg changesetcopies
272 files: j
272 files: j
273
273
274 #else
274 #else
275 $ hg ci --amend -m 'copy a to j, v2'
275 $ hg ci --amend -m 'copy a to j, v2'
276 saved backup bundle to $TESTTMP/repo/.hg/strip-backup/*-*-amend.hg (glob)
276 saved backup bundle to $TESTTMP/repo/.hg/strip-backup/*-*-amend.hg (glob)
277 $ hg debugsidedata -c -v -- -1
277 $ hg debugsidedata -c -v -- -1
278 1 sidedata entries
278 1 sidedata entries
279 entry-0014 size 24
279 entry-0014 size 24
280 '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00aj'
280 '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00aj'
281 #endif
281 #endif
282 $ hg showcopies --config experimental.copies.read-from=filelog-only
282 $ hg showcopies --config experimental.copies.read-from=filelog-only
283 a -> j
283 a -> j
284 The entries should be written to extras even if they're empty (so the client
284 The entries should be written to extras even if they're empty (so the client
285 won't have to fall back to reading from filelogs)
285 won't have to fall back to reading from filelogs)
286 $ echo x >> j
286 $ echo x >> j
287 #if extra
287 #if extra
288 $ hg ci -m 'modify j' --config experimental.copies.write-to=compatibility
288 $ hg ci -m 'modify j' --config experimental.copies.write-to=compatibility
289 $ hg changesetcopies
289 $ hg changesetcopies
290 files: j
290 files: j
291 filesadded:
291 filesadded:
292 filesremoved:
292 filesremoved:
293
293
294 p1copies:
294 p1copies:
295 p2copies:
295 p2copies:
296 #else
296 #else
297 $ hg ci -m 'modify j'
297 $ hg ci -m 'modify j'
298 $ hg debugsidedata -c -v -- -1
298 $ hg debugsidedata -c -v -- -1
299 1 sidedata entries
299 1 sidedata entries
300 entry-0014 size 14
300 entry-0014 size 14
301 '\x00\x00\x00\x01\x14\x00\x00\x00\x01\x00\x00\x00\x00j'
301 '\x00\x00\x00\x01\x14\x00\x00\x00\x01\x00\x00\x00\x00j'
302 #endif
302 #endif
303
303
304 Test writing only to filelog
304 Test writing only to filelog
305
305
306 $ hg cp a k
306 $ hg cp a k
307 #if extra
307 #if extra
308 $ hg ci -m 'copy a to k' --config experimental.copies.write-to=filelog-only
308 $ hg ci -m 'copy a to k' --config experimental.copies.write-to=filelog-only
309
309
310 $ hg changesetcopies
310 $ hg changesetcopies
311 files: k
311 files: k
312
312
313 #else
313 #else
314 $ hg ci -m 'copy a to k'
314 $ hg ci -m 'copy a to k'
315 $ hg debugsidedata -c -v -- -1
315 $ hg debugsidedata -c -v -- -1
316 1 sidedata entries
316 1 sidedata entries
317 entry-0014 size 24
317 entry-0014 size 24
318 '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00ak'
318 '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00ak'
319 #endif
319 #endif
320
320
321 $ hg debugdata k 0
321 $ hg debugdata k 0
322 \x01 (esc)
322 \x01 (esc)
323 copy: a
323 copy: a
324 copyrev: b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3
324 copyrev: b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3
325 \x01 (esc)
325 \x01 (esc)
326 a
326 a
327 #if extra
327 #if extra
328 $ hg showcopies
328 $ hg showcopies
329
329
330 $ hg showcopies --config experimental.copies.read-from=compatibility
330 $ hg showcopies --config experimental.copies.read-from=compatibility
331 a -> k
331 a -> k
332 $ hg showcopies --config experimental.copies.read-from=filelog-only
332 $ hg showcopies --config experimental.copies.read-from=filelog-only
333 a -> k
333 a -> k
334 #else
334 #else
335 $ hg showcopies
335 $ hg showcopies
336 a -> k
336 a -> k
337 #endif
337 #endif
338
338
339 $ cd ..
339 $ cd ..
340
340
341 Test rebasing a commit with copy information
341 Test rebasing a commit with copy information
342
342
343 $ hg init rebase-rename
343 $ hg init rebase-rename
344 $ cd rebase-rename
344 $ cd rebase-rename
345 $ echo a > a
345 $ echo a > a
346 $ hg ci -Aqm 'add a'
346 $ hg ci -Aqm 'add a'
347 $ echo a2 > a
347 $ echo a2 > a
348 $ hg ci -m 'modify a'
348 $ hg ci -m 'modify a'
349 $ hg co -q 0
349 $ hg co -q 0
350 $ hg mv a b
350 $ hg mv a b
351 $ hg ci -qm 'rename a to b'
351 $ hg ci -qm 'rename a to b'
352 Not only do we want this to run in-memory, it shouldn't fall back to
352 Not only do we want this to run in-memory, it shouldn't fall back to
353 on-disk merge (no conflicts), so we force it to be in-memory
353 on-disk merge (no conflicts), so we force it to be in-memory
354 with no fallback.
354 with no fallback.
355 $ hg rebase -d 1 --config rebase.experimental.inmemory=yes --config devel.rebase.force-in-memory-merge=yes
355 $ hg rebase -d 1 --config rebase.experimental.inmemory=yes --config devel.rebase.force-in-memory-merge=yes
356 rebasing 2:* tip "rename a to b" (glob)
356 rebasing 2:* tip "rename a to b" (glob)
357 merging a and b to b
357 merging a and b to b
358 saved backup bundle to $TESTTMP/rebase-rename/.hg/strip-backup/*-*-rebase.hg (glob)
358 saved backup bundle to $TESTTMP/rebase-rename/.hg/strip-backup/*-*-rebase.hg (glob)
359 $ hg st --change . --copies
359 $ hg st --change . --copies
360 A b
360 A b
361 a
361 a
362 R a
362 R a
363 $ cd ..
363 $ cd ..
364
364
365 Test splitting a commit
365 Test splitting a commit
366
366
367 $ hg init split
367 $ hg init split
368 $ cd split
368 $ cd split
369 $ echo a > a
369 $ echo a > a
370 $ echo b > b
370 $ echo b > b
371 $ hg ci -Aqm 'add a and b'
371 $ hg ci -Aqm 'add a and b'
372 $ echo a2 > a
372 $ echo a2 > a
373 $ hg mv b c
373 $ hg mv b c
374 $ hg ci -m 'modify a, move b to c'
374 $ hg ci -m 'modify a, move b to c'
375 $ hg --config ui.interactive=yes split <<EOF
375 $ hg --config ui.interactive=yes split <<EOF
376 > y
376 > y
377 > y
377 > y
378 > n
378 > n
379 > y
379 > y
380 > EOF
380 > EOF
381 diff --git a/a b/a
381 diff --git a/a b/a
382 1 hunks, 1 lines changed
382 1 hunks, 1 lines changed
383 examine changes to 'a'?
383 examine changes to 'a'?
384 (enter ? for help) [Ynesfdaq?] y
384 (enter ? for help) [Ynesfdaq?] y
385
385
386 @@ -1,1 +1,1 @@
386 @@ -1,1 +1,1 @@
387 -a
387 -a
388 +a2
388 +a2
389 record this change to 'a'?
389 record this change to 'a'?
390 (enter ? for help) [Ynesfdaq?] y
390 (enter ? for help) [Ynesfdaq?] y
391
391
392 diff --git a/b b/c
392 diff --git a/b b/c
393 rename from b
393 rename from b
394 rename to c
394 rename to c
395 examine changes to 'b' and 'c'?
395 examine changes to 'b' and 'c'?
396 (enter ? for help) [Ynesfdaq?] n
396 (enter ? for help) [Ynesfdaq?] n
397
397
398 created new head
398 created new head
399 diff --git a/b b/c
399 diff --git a/b b/c
400 rename from b
400 rename from b
401 rename to c
401 rename to c
402 examine changes to 'b' and 'c'?
402 examine changes to 'b' and 'c'?
403 (enter ? for help) [Ynesfdaq?] y
403 (enter ? for help) [Ynesfdaq?] y
404
404
405 saved backup bundle to $TESTTMP/split/.hg/strip-backup/*-*-split.hg (glob)
405 saved backup bundle to $TESTTMP/split/.hg/strip-backup/*-*-split.hg (glob)
406 $ cd ..
406 $ cd ..
407
407
408 Test committing half a rename
408 Test committing half a rename
409
409
410 $ hg init partial
410 $ hg init partial
411 $ cd partial
411 $ cd partial
412 $ echo a > a
412 $ echo a > a
413 $ hg ci -Aqm 'add a'
413 $ hg ci -Aqm 'add a'
414 $ hg mv a b
414 $ hg mv a b
415 $ hg ci -m 'remove a' a
415 $ hg ci -m 'remove a' a
416
416
417 #if sidedata
417 #if sidedata
418
418
419 Test upgrading/downgrading to sidedata storage
419 Test upgrading/downgrading to sidedata storage
420 ==============================================
420 ==============================================
421
421
422 downgrading (keeping some sidedata)
422 downgrading
423
423
424 $ hg debugformat -v
424 $ hg debugformat -v
425 format-variant repo config default
425 format-variant repo config default
426 fncache: yes yes yes
426 fncache: yes yes yes
427 dotencode: yes yes yes
427 dotencode: yes yes yes
428 generaldelta: yes yes yes
428 generaldelta: yes yes yes
429 share-safe: no no no
429 share-safe: no no no
430 sparserevlog: yes yes yes
430 sparserevlog: yes yes yes
431 persistent-nodemap: no no no (no-rust !)
431 persistent-nodemap: no no no (no-rust !)
432 persistent-nodemap: yes yes no (rust !)
432 persistent-nodemap: yes yes no (rust !)
433 copies-sdc: yes yes no
433 copies-sdc: yes yes no
434 revlog-v2: yes yes no
434 revlog-v2: yes yes no
435 plain-cl-delta: yes yes yes
435 plain-cl-delta: yes yes yes
436 compression: zlib zlib zlib (no-zstd !)
436 compression: zlib zlib zlib (no-zstd !)
437 compression: zstd zstd zstd (zstd !)
437 compression: zstd zstd zstd (zstd !)
438 compression-level: default default default
438 compression-level: default default default
439 $ hg debugsidedata -c -- 0
439 $ hg debugsidedata -c -- 0
440 1 sidedata entries
440 1 sidedata entries
441 entry-0014 size 14
441 entry-0014 size 14
442 $ hg debugsidedata -c -- 1
442 $ hg debugsidedata -c -- 1
443 1 sidedata entries
443 1 sidedata entries
444 entry-0014 size 14
444 entry-0014 size 14
445 $ hg debugsidedata -m -- 0
445 $ hg debugsidedata -m -- 0
446 $ cat << EOF > .hg/hgrc
446 $ cat << EOF > .hg/hgrc
447 > [format]
447 > [format]
448 > exp-use-side-data = yes
448 > exp-use-side-data = yes
449 > exp-use-copies-side-data-changeset = no
449 > exp-use-copies-side-data-changeset = no
450 > EOF
450 > EOF
451 $ hg debugupgraderepo --run --quiet --no-backup > /dev/null
451 $ hg debugupgraderepo --run --quiet --no-backup > /dev/null
452 $ hg debugformat -v
452 $ hg debugformat -v
453 format-variant repo config default
453 format-variant repo config default
454 fncache: yes yes yes
454 fncache: yes yes yes
455 dotencode: yes yes yes
455 dotencode: yes yes yes
456 generaldelta: yes yes yes
456 generaldelta: yes yes yes
457 share-safe: no no no
457 share-safe: no no no
458 sparserevlog: yes yes yes
458 sparserevlog: yes yes yes
459 persistent-nodemap: no no no (no-rust !)
459 persistent-nodemap: no no no (no-rust !)
460 persistent-nodemap: yes yes no (rust !)
460 persistent-nodemap: yes yes no (rust !)
461 copies-sdc: no no no
461 copies-sdc: no no no
462 revlog-v2: yes yes no
462 revlog-v2: yes yes no
463 plain-cl-delta: yes yes yes
463 plain-cl-delta: yes yes yes
464 compression: zlib zlib zlib (no-zstd !)
464 compression: zlib zlib zlib (no-zstd !)
465 compression: zstd zstd zstd (zstd !)
465 compression: zstd zstd zstd (zstd !)
466 compression-level: default default default
466 compression-level: default default default
467 $ hg debugsidedata -c -- 0
467 $ hg debugsidedata -c -- 0
468 1 sidedata entries
469 entry-0014 size 14
470 $ hg debugsidedata -c -- 1
468 $ hg debugsidedata -c -- 1
471 1 sidedata entries
472 entry-0014 size 14
473 $ hg debugsidedata -m -- 0
469 $ hg debugsidedata -m -- 0
474
470
475 upgrading
471 upgrading
476
472
477 $ cat << EOF > .hg/hgrc
473 $ cat << EOF > .hg/hgrc
478 > [format]
474 > [format]
479 > exp-use-copies-side-data-changeset = yes
475 > exp-use-copies-side-data-changeset = yes
480 > EOF
476 > EOF
481 $ hg debugupgraderepo --run --quiet --no-backup > /dev/null
477 $ hg debugupgraderepo --run --quiet --no-backup > /dev/null
482 $ hg debugformat -v
478 $ hg debugformat -v
483 format-variant repo config default
479 format-variant repo config default
484 fncache: yes yes yes
480 fncache: yes yes yes
485 dotencode: yes yes yes
481 dotencode: yes yes yes
486 generaldelta: yes yes yes
482 generaldelta: yes yes yes
487 share-safe: no no no
483 share-safe: no no no
488 sparserevlog: yes yes yes
484 sparserevlog: yes yes yes
489 persistent-nodemap: no no no (no-rust !)
485 persistent-nodemap: no no no (no-rust !)
490 persistent-nodemap: yes yes no (rust !)
486 persistent-nodemap: yes yes no (rust !)
491 copies-sdc: yes yes no
487 copies-sdc: yes yes no
492 revlog-v2: yes yes no
488 revlog-v2: yes yes no
493 plain-cl-delta: yes yes yes
489 plain-cl-delta: yes yes yes
494 compression: zlib zlib zlib (no-zstd !)
490 compression: zlib zlib zlib (no-zstd !)
495 compression: zstd zstd zstd (zstd !)
491 compression: zstd zstd zstd (zstd !)
496 compression-level: default default default
492 compression-level: default default default
497 $ hg debugsidedata -c -- 0
493 $ hg debugsidedata -c -- 0
498 1 sidedata entries
494 1 sidedata entries
499 entry-0014 size 14
495 entry-0014 size 14
500 $ hg debugsidedata -c -- 1
496 $ hg debugsidedata -c -- 1
501 1 sidedata entries
497 1 sidedata entries
502 entry-0014 size 14
498 entry-0014 size 14
503 $ hg debugsidedata -m -- 0
499 $ hg debugsidedata -m -- 0
504
500
505 #endif
501 #endif
506
502
507 $ cd ..
503 $ cd ..
@@ -1,93 +1,106 b''
1 # ext-sidedata.py - small extension to test the sidedata logic
1 # ext-sidedata.py - small extension to test the sidedata logic
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import hashlib
10 import hashlib
11 import struct
11 import struct
12
12
13 from mercurial.node import nullrev
13 from mercurial.node import nullrev
14 from mercurial import (
14 from mercurial import (
15 changegroup,
15 extensions,
16 extensions,
16 requirements,
17 requirements,
17 revlog,
18 revlog,
18 )
19 )
19
20
20 from mercurial.upgrade_utils import engine as upgrade_engine
21 from mercurial.upgrade_utils import engine as upgrade_engine
21
22
23 from mercurial.revlogutils import constants
22 from mercurial.revlogutils import sidedata
24 from mercurial.revlogutils import sidedata
23
25
24
26
25 def wrapaddrevision(
27 def wrapaddrevision(
26 orig, self, text, transaction, link, p1, p2, *args, **kwargs
28 orig, self, text, transaction, link, p1, p2, *args, **kwargs
27 ):
29 ):
28 if kwargs.get('sidedata') is None:
30 if kwargs.get('sidedata') is None:
29 kwargs['sidedata'] = {}
31 kwargs['sidedata'] = {}
30 sd = kwargs['sidedata']
32 sd = kwargs['sidedata']
31 ## let's store some arbitrary data just for testing
33 ## let's store some arbitrary data just for testing
32 # text length
34 # text length
33 sd[sidedata.SD_TEST1] = struct.pack('>I', len(text))
35 sd[sidedata.SD_TEST1] = struct.pack('>I', len(text))
34 # and sha2 hashes
36 # and sha2 hashes
35 sha256 = hashlib.sha256(text).digest()
37 sha256 = hashlib.sha256(text).digest()
36 sd[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
38 sd[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
37 return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
39 return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
38
40
39
41
40 def wrap_revisiondata(orig, self, nodeorrev, *args, **kwargs):
42 def wrap_revisiondata(orig, self, nodeorrev, *args, **kwargs):
41 text, sd = orig(self, nodeorrev, *args, **kwargs)
43 text, sd = orig(self, nodeorrev, *args, **kwargs)
42 if getattr(self, 'sidedatanocheck', False):
44 if getattr(self, 'sidedatanocheck', False):
43 return text, sd
45 return text, sd
44 if self.version & 0xFFFF != 2:
46 if self.version & 0xFFFF != 2:
45 return text, sd
47 return text, sd
46 if nodeorrev != nullrev and nodeorrev != self.nullid:
48 if nodeorrev != nullrev and nodeorrev != self.nullid:
47 cat1 = sd.get(sidedata.SD_TEST1)
49 cat1 = sd.get(sidedata.SD_TEST1)
48 if cat1 is not None and len(text) != struct.unpack('>I', cat1)[0]:
50 if cat1 is not None and len(text) != struct.unpack('>I', cat1)[0]:
49 raise RuntimeError('text size mismatch')
51 raise RuntimeError('text size mismatch')
50 expected = sd.get(sidedata.SD_TEST2)
52 expected = sd.get(sidedata.SD_TEST2)
51 got = hashlib.sha256(text).digest()
53 got = hashlib.sha256(text).digest()
52 if expected is not None and got != expected:
54 if expected is not None and got != expected:
53 raise RuntimeError('sha256 mismatch')
55 raise RuntimeError('sha256 mismatch')
54 return text, sd
56 return text, sd
55
57
56
58
57 def wrapgetsidedatacompanion(orig, srcrepo, dstrepo):
59 def wrapget_sidedata_helpers(orig, srcrepo, dstrepo):
58 sidedatacompanion = orig(srcrepo, dstrepo)
60 repo, computers, removers = orig(srcrepo, dstrepo)
61 assert not computers and not removers # deal with composition later
59 addedreqs = dstrepo.requirements - srcrepo.requirements
62 addedreqs = dstrepo.requirements - srcrepo.requirements
63
60 if requirements.SIDEDATA_REQUIREMENT in addedreqs:
64 if requirements.SIDEDATA_REQUIREMENT in addedreqs:
61 assert sidedatacompanion is None # deal with composition later
62
65
63 def sidedatacompanion(revlog, rev):
66 def computer(repo, revlog, rev, old_sidedata):
67 assert not old_sidedata # not supported yet
64 update = {}
68 update = {}
65 revlog.sidedatanocheck = True
69 revlog.sidedatanocheck = True
66 try:
70 try:
67 text = revlog.revision(rev)
71 text = revlog.revision(rev)
68 finally:
72 finally:
69 del revlog.sidedatanocheck
73 del revlog.sidedatanocheck
70 ## let's store some arbitrary data just for testing
74 ## let's store some arbitrary data just for testing
71 # text length
75 # text length
72 update[sidedata.SD_TEST1] = struct.pack('>I', len(text))
76 update[sidedata.SD_TEST1] = struct.pack('>I', len(text))
73 # and sha2 hashes
77 # and sha2 hashes
74 sha256 = hashlib.sha256(text).digest()
78 sha256 = hashlib.sha256(text).digest()
75 update[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
79 update[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
76 return False, (), update, 0, 0
80 return update, (0, 0)
77
81
78 return sidedatacompanion
82 srcrepo.register_sidedata_computer(
83 constants.KIND_CHANGELOG,
84 b"whatever",
85 (sidedata.SD_TEST1, sidedata.SD_TEST2),
86 computer,
87 0,
88 )
89 dstrepo.register_wanted_sidedata(b"whatever")
90
91 return changegroup.get_sidedata_helpers(srcrepo, dstrepo._wanted_sidedata)
79
92
80
93
81 def extsetup(ui):
94 def extsetup(ui):
82 extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
95 extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
83 extensions.wrapfunction(revlog.revlog, '_revisiondata', wrap_revisiondata)
96 extensions.wrapfunction(revlog.revlog, '_revisiondata', wrap_revisiondata)
84 extensions.wrapfunction(
97 extensions.wrapfunction(
85 upgrade_engine, 'getsidedatacompanion', wrapgetsidedatacompanion
98 upgrade_engine, 'get_sidedata_helpers', wrapget_sidedata_helpers
86 )
99 )
87
100
88
101
89 def reposetup(ui, repo):
102 def reposetup(ui, repo):
90 # We don't register sidedata computers because we don't care within these
103 # We don't register sidedata computers because we don't care within these
91 # tests
104 # tests
92 repo.register_wanted_sidedata(sidedata.SD_TEST1)
105 repo.register_wanted_sidedata(sidedata.SD_TEST1)
93 repo.register_wanted_sidedata(sidedata.SD_TEST2)
106 repo.register_wanted_sidedata(sidedata.SD_TEST2)
General Comments 0
You need to be logged in to leave comments. Login now