##// END OF EJS Templates
unbundle: faster computation of changed heads...
Arseniy Alekseyev -
r52288:a0d88b02 default
parent child Browse files
Show More
@@ -1,2442 +1,2441 b''
1 # changegroup.py - Mercurial changegroup manipulation functions
1 # changegroup.py - Mercurial changegroup manipulation functions
2 #
2 #
3 # Copyright 2006 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2006 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import os
9 import os
10 import struct
10 import struct
11 import weakref
11 import weakref
12
12
13 from .i18n import _
13 from .i18n import _
14 from .node import (
14 from .node import (
15 hex,
15 hex,
16 nullrev,
16 nullrev,
17 short,
17 short,
18 )
18 )
19 from .pycompat import open
19 from .pycompat import open
20
20
21 from . import (
21 from . import (
22 error,
22 error,
23 match as matchmod,
23 match as matchmod,
24 mdiff,
24 mdiff,
25 phases,
25 phases,
26 pycompat,
26 pycompat,
27 requirements,
27 requirements,
28 scmutil,
28 scmutil,
29 util,
29 util,
30 )
30 )
31
31
32 from .interfaces import repository
32 from .interfaces import repository
33 from .revlogutils import sidedata as sidedatamod
33 from .revlogutils import sidedata as sidedatamod
34 from .revlogutils import constants as revlog_constants
34 from .revlogutils import constants as revlog_constants
35 from .utils import storageutil
35 from .utils import storageutil
36
36
37 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
37 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
38 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
38 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
39 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
39 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
40 _CHANGEGROUPV4_DELTA_HEADER = struct.Struct(b">B20s20s20s20s20sH")
40 _CHANGEGROUPV4_DELTA_HEADER = struct.Struct(b">B20s20s20s20s20sH")
41
41
42 LFS_REQUIREMENT = b'lfs'
42 LFS_REQUIREMENT = b'lfs'
43
43
44 readexactly = util.readexactly
44 readexactly = util.readexactly
45
45
46
46
47 def getchunk(stream):
47 def getchunk(stream):
48 """return the next chunk from stream as a string"""
48 """return the next chunk from stream as a string"""
49 d = readexactly(stream, 4)
49 d = readexactly(stream, 4)
50 l = struct.unpack(b">l", d)[0]
50 l = struct.unpack(b">l", d)[0]
51 if l <= 4:
51 if l <= 4:
52 if l:
52 if l:
53 raise error.Abort(_(b"invalid chunk length %d") % l)
53 raise error.Abort(_(b"invalid chunk length %d") % l)
54 return b""
54 return b""
55 return readexactly(stream, l - 4)
55 return readexactly(stream, l - 4)
56
56
57
57
58 def chunkheader(length):
58 def chunkheader(length):
59 """return a changegroup chunk header (string)"""
59 """return a changegroup chunk header (string)"""
60 return struct.pack(b">l", length + 4)
60 return struct.pack(b">l", length + 4)
61
61
62
62
63 def closechunk():
63 def closechunk():
64 """return a changegroup chunk header (string) for a zero-length chunk"""
64 """return a changegroup chunk header (string) for a zero-length chunk"""
65 return struct.pack(b">l", 0)
65 return struct.pack(b">l", 0)
66
66
67
67
68 def _fileheader(path):
68 def _fileheader(path):
69 """Obtain a changegroup chunk header for a named path."""
69 """Obtain a changegroup chunk header for a named path."""
70 return chunkheader(len(path)) + path
70 return chunkheader(len(path)) + path
71
71
72
72
73 def writechunks(ui, chunks, filename, vfs=None):
73 def writechunks(ui, chunks, filename, vfs=None):
74 """Write chunks to a file and return its filename.
74 """Write chunks to a file and return its filename.
75
75
76 The stream is assumed to be a bundle file.
76 The stream is assumed to be a bundle file.
77 Existing files will not be overwritten.
77 Existing files will not be overwritten.
78 If no filename is specified, a temporary file is created.
78 If no filename is specified, a temporary file is created.
79 """
79 """
80 fh = None
80 fh = None
81 cleanup = None
81 cleanup = None
82 try:
82 try:
83 if filename:
83 if filename:
84 if vfs:
84 if vfs:
85 fh = vfs.open(filename, b"wb")
85 fh = vfs.open(filename, b"wb")
86 else:
86 else:
87 # Increase default buffer size because default is usually
87 # Increase default buffer size because default is usually
88 # small (4k is common on Linux).
88 # small (4k is common on Linux).
89 fh = open(filename, b"wb", 131072)
89 fh = open(filename, b"wb", 131072)
90 else:
90 else:
91 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
91 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
92 fh = os.fdopen(fd, "wb")
92 fh = os.fdopen(fd, "wb")
93 cleanup = filename
93 cleanup = filename
94 for c in chunks:
94 for c in chunks:
95 fh.write(c)
95 fh.write(c)
96 cleanup = None
96 cleanup = None
97 return filename
97 return filename
98 finally:
98 finally:
99 if fh is not None:
99 if fh is not None:
100 fh.close()
100 fh.close()
101 if cleanup is not None:
101 if cleanup is not None:
102 if filename and vfs:
102 if filename and vfs:
103 vfs.unlink(cleanup)
103 vfs.unlink(cleanup)
104 else:
104 else:
105 os.unlink(cleanup)
105 os.unlink(cleanup)
106
106
107
107
108 def _dbg_ubdl_line(
108 def _dbg_ubdl_line(
109 ui,
109 ui,
110 indent,
110 indent,
111 key,
111 key,
112 base_value=None,
112 base_value=None,
113 percentage_base=None,
113 percentage_base=None,
114 percentage_key=None,
114 percentage_key=None,
115 ):
115 ):
116 """Print one line of debug_unbundle_debug_info"""
116 """Print one line of debug_unbundle_debug_info"""
117 line = b"DEBUG-UNBUNDLING: "
117 line = b"DEBUG-UNBUNDLING: "
118 line += b' ' * (2 * indent)
118 line += b' ' * (2 * indent)
119 key += b":"
119 key += b":"
120 padding = b''
120 padding = b''
121 if base_value is not None:
121 if base_value is not None:
122 assert len(key) + 1 + (2 * indent) <= _KEY_PART_WIDTH
122 assert len(key) + 1 + (2 * indent) <= _KEY_PART_WIDTH
123 line += key.ljust(_KEY_PART_WIDTH - (2 * indent))
123 line += key.ljust(_KEY_PART_WIDTH - (2 * indent))
124 if isinstance(base_value, float):
124 if isinstance(base_value, float):
125 line += b"%14.3f seconds" % base_value
125 line += b"%14.3f seconds" % base_value
126 else:
126 else:
127 line += b"%10d" % base_value
127 line += b"%10d" % base_value
128 padding = b' '
128 padding = b' '
129 else:
129 else:
130 line += key
130 line += key
131
131
132 if percentage_base is not None:
132 if percentage_base is not None:
133 line += padding
133 line += padding
134 padding = b''
134 padding = b''
135 assert base_value is not None
135 assert base_value is not None
136 percentage = base_value * 100 // percentage_base
136 percentage = base_value * 100 // percentage_base
137 if percentage_key is not None:
137 if percentage_key is not None:
138 line += b" (%3d%% of %s)" % (
138 line += b" (%3d%% of %s)" % (
139 percentage,
139 percentage,
140 percentage_key,
140 percentage_key,
141 )
141 )
142 else:
142 else:
143 line += b" (%3d%%)" % percentage
143 line += b" (%3d%%)" % percentage
144
144
145 line += b'\n'
145 line += b'\n'
146 ui.write_err(line)
146 ui.write_err(line)
147
147
148
148
149 def _sumf(items):
149 def _sumf(items):
150 # python < 3.8 does not support a `start=0.0` argument to sum
150 # python < 3.8 does not support a `start=0.0` argument to sum
151 # So we have to cheat a bit until we drop support for those version
151 # So we have to cheat a bit until we drop support for those version
152 if not items:
152 if not items:
153 return 0.0
153 return 0.0
154 return sum(items)
154 return sum(items)
155
155
156
156
157 def display_unbundle_debug_info(ui, debug_info):
157 def display_unbundle_debug_info(ui, debug_info):
158 """display an unbundling report from debug information"""
158 """display an unbundling report from debug information"""
159 cl_info = []
159 cl_info = []
160 mn_info = []
160 mn_info = []
161 fl_info = []
161 fl_info = []
162 _dispatch = [
162 _dispatch = [
163 (b'CHANGELOG:', cl_info),
163 (b'CHANGELOG:', cl_info),
164 (b'MANIFESTLOG:', mn_info),
164 (b'MANIFESTLOG:', mn_info),
165 (b'FILELOG:', fl_info),
165 (b'FILELOG:', fl_info),
166 ]
166 ]
167 for e in debug_info:
167 for e in debug_info:
168 for prefix, info in _dispatch:
168 for prefix, info in _dispatch:
169 if e["target-revlog"].startswith(prefix):
169 if e["target-revlog"].startswith(prefix):
170 info.append(e)
170 info.append(e)
171 break
171 break
172 else:
172 else:
173 assert False, 'unreachable'
173 assert False, 'unreachable'
174 each_info = [
174 each_info = [
175 (b'changelog', cl_info),
175 (b'changelog', cl_info),
176 (b'manifests', mn_info),
176 (b'manifests', mn_info),
177 (b'files', fl_info),
177 (b'files', fl_info),
178 ]
178 ]
179
179
180 # General Revision Countss
180 # General Revision Countss
181 _dbg_ubdl_line(ui, 0, b'revisions', len(debug_info))
181 _dbg_ubdl_line(ui, 0, b'revisions', len(debug_info))
182 for key, info in each_info:
182 for key, info in each_info:
183 if not info:
183 if not info:
184 continue
184 continue
185 _dbg_ubdl_line(ui, 1, key, len(info), len(debug_info))
185 _dbg_ubdl_line(ui, 1, key, len(info), len(debug_info))
186
186
187 # General Time spent
187 # General Time spent
188 all_durations = [e['duration'] for e in debug_info]
188 all_durations = [e['duration'] for e in debug_info]
189 all_durations.sort()
189 all_durations.sort()
190 total_duration = _sumf(all_durations)
190 total_duration = _sumf(all_durations)
191 _dbg_ubdl_line(ui, 0, b'total-time', total_duration)
191 _dbg_ubdl_line(ui, 0, b'total-time', total_duration)
192
192
193 for key, info in each_info:
193 for key, info in each_info:
194 if not info:
194 if not info:
195 continue
195 continue
196 durations = [e['duration'] for e in info]
196 durations = [e['duration'] for e in info]
197 durations.sort()
197 durations.sort()
198 _dbg_ubdl_line(ui, 1, key, _sumf(durations), total_duration)
198 _dbg_ubdl_line(ui, 1, key, _sumf(durations), total_duration)
199
199
200 # Count and cache reuse per delta types
200 # Count and cache reuse per delta types
201 each_types = {}
201 each_types = {}
202 for key, info in each_info:
202 for key, info in each_info:
203 each_types[key] = types = {
203 each_types[key] = types = {
204 b'full': 0,
204 b'full': 0,
205 b'full-cached': 0,
205 b'full-cached': 0,
206 b'snapshot': 0,
206 b'snapshot': 0,
207 b'snapshot-cached': 0,
207 b'snapshot-cached': 0,
208 b'delta': 0,
208 b'delta': 0,
209 b'delta-cached': 0,
209 b'delta-cached': 0,
210 b'unknown': 0,
210 b'unknown': 0,
211 b'unknown-cached': 0,
211 b'unknown-cached': 0,
212 }
212 }
213 for e in info:
213 for e in info:
214 types[e['type']] += 1
214 types[e['type']] += 1
215 if e['using-cached-base']:
215 if e['using-cached-base']:
216 types[e['type'] + b'-cached'] += 1
216 types[e['type'] + b'-cached'] += 1
217
217
218 EXPECTED_TYPES = (b'full', b'snapshot', b'delta', b'unknown')
218 EXPECTED_TYPES = (b'full', b'snapshot', b'delta', b'unknown')
219 if debug_info:
219 if debug_info:
220 _dbg_ubdl_line(ui, 0, b'type-count')
220 _dbg_ubdl_line(ui, 0, b'type-count')
221 for key, info in each_info:
221 for key, info in each_info:
222 if info:
222 if info:
223 _dbg_ubdl_line(ui, 1, key)
223 _dbg_ubdl_line(ui, 1, key)
224 t = each_types[key]
224 t = each_types[key]
225 for tn in EXPECTED_TYPES:
225 for tn in EXPECTED_TYPES:
226 if t[tn]:
226 if t[tn]:
227 tc = tn + b'-cached'
227 tc = tn + b'-cached'
228 _dbg_ubdl_line(ui, 2, tn, t[tn])
228 _dbg_ubdl_line(ui, 2, tn, t[tn])
229 _dbg_ubdl_line(ui, 3, b'cached', t[tc], t[tn])
229 _dbg_ubdl_line(ui, 3, b'cached', t[tc], t[tn])
230
230
231 # time perf delta types and reuse
231 # time perf delta types and reuse
232 each_type_time = {}
232 each_type_time = {}
233 for key, info in each_info:
233 for key, info in each_info:
234 each_type_time[key] = t = {
234 each_type_time[key] = t = {
235 b'full': [],
235 b'full': [],
236 b'full-cached': [],
236 b'full-cached': [],
237 b'snapshot': [],
237 b'snapshot': [],
238 b'snapshot-cached': [],
238 b'snapshot-cached': [],
239 b'delta': [],
239 b'delta': [],
240 b'delta-cached': [],
240 b'delta-cached': [],
241 b'unknown': [],
241 b'unknown': [],
242 b'unknown-cached': [],
242 b'unknown-cached': [],
243 }
243 }
244 for e in info:
244 for e in info:
245 t[e['type']].append(e['duration'])
245 t[e['type']].append(e['duration'])
246 if e['using-cached-base']:
246 if e['using-cached-base']:
247 t[e['type'] + b'-cached'].append(e['duration'])
247 t[e['type'] + b'-cached'].append(e['duration'])
248 for t_key, value in list(t.items()):
248 for t_key, value in list(t.items()):
249 value.sort()
249 value.sort()
250 t[t_key] = _sumf(value)
250 t[t_key] = _sumf(value)
251
251
252 if debug_info:
252 if debug_info:
253 _dbg_ubdl_line(ui, 0, b'type-time')
253 _dbg_ubdl_line(ui, 0, b'type-time')
254 for key, info in each_info:
254 for key, info in each_info:
255 if info:
255 if info:
256 _dbg_ubdl_line(ui, 1, key)
256 _dbg_ubdl_line(ui, 1, key)
257 t = each_type_time[key]
257 t = each_type_time[key]
258 td = total_duration # to same space on next lines
258 td = total_duration # to same space on next lines
259 for tn in EXPECTED_TYPES:
259 for tn in EXPECTED_TYPES:
260 if t[tn]:
260 if t[tn]:
261 tc = tn + b'-cached'
261 tc = tn + b'-cached'
262 _dbg_ubdl_line(ui, 2, tn, t[tn], td, b"total")
262 _dbg_ubdl_line(ui, 2, tn, t[tn], td, b"total")
263 _dbg_ubdl_line(ui, 3, b'cached', t[tc], td, b"total")
263 _dbg_ubdl_line(ui, 3, b'cached', t[tc], td, b"total")
264
264
265
265
266 class cg1unpacker:
266 class cg1unpacker:
267 """Unpacker for cg1 changegroup streams.
267 """Unpacker for cg1 changegroup streams.
268
268
269 A changegroup unpacker handles the framing of the revision data in
269 A changegroup unpacker handles the framing of the revision data in
270 the wire format. Most consumers will want to use the apply()
270 the wire format. Most consumers will want to use the apply()
271 method to add the changes from the changegroup to a repository.
271 method to add the changes from the changegroup to a repository.
272
272
273 If you're forwarding a changegroup unmodified to another consumer,
273 If you're forwarding a changegroup unmodified to another consumer,
274 use getchunks(), which returns an iterator of changegroup
274 use getchunks(), which returns an iterator of changegroup
275 chunks. This is mostly useful for cases where you need to know the
275 chunks. This is mostly useful for cases where you need to know the
276 data stream has ended by observing the end of the changegroup.
276 data stream has ended by observing the end of the changegroup.
277
277
278 deltachunk() is useful only if you're applying delta data. Most
278 deltachunk() is useful only if you're applying delta data. Most
279 consumers should prefer apply() instead.
279 consumers should prefer apply() instead.
280
280
281 A few other public methods exist. Those are used only for
281 A few other public methods exist. Those are used only for
282 bundlerepo and some debug commands - their use is discouraged.
282 bundlerepo and some debug commands - their use is discouraged.
283 """
283 """
284
284
285 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
285 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
286 deltaheadersize = deltaheader.size
286 deltaheadersize = deltaheader.size
287 version = b'01'
287 version = b'01'
288 _grouplistcount = 1 # One list of files after the manifests
288 _grouplistcount = 1 # One list of files after the manifests
289
289
290 def __init__(self, fh, alg, extras=None):
290 def __init__(self, fh, alg, extras=None):
291 if alg is None:
291 if alg is None:
292 alg = b'UN'
292 alg = b'UN'
293 if alg not in util.compengines.supportedbundletypes:
293 if alg not in util.compengines.supportedbundletypes:
294 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
294 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
295 if alg == b'BZ':
295 if alg == b'BZ':
296 alg = b'_truncatedBZ'
296 alg = b'_truncatedBZ'
297
297
298 compengine = util.compengines.forbundletype(alg)
298 compengine = util.compengines.forbundletype(alg)
299 self._stream = compengine.decompressorreader(fh)
299 self._stream = compengine.decompressorreader(fh)
300 self._type = alg
300 self._type = alg
301 self.extras = extras or {}
301 self.extras = extras or {}
302 self.callback = None
302 self.callback = None
303
303
304 # These methods (compressed, read, seek, tell) all appear to only
304 # These methods (compressed, read, seek, tell) all appear to only
305 # be used by bundlerepo, but it's a little hard to tell.
305 # be used by bundlerepo, but it's a little hard to tell.
306 def compressed(self):
306 def compressed(self):
307 return self._type is not None and self._type != b'UN'
307 return self._type is not None and self._type != b'UN'
308
308
309 def read(self, l):
309 def read(self, l):
310 return self._stream.read(l)
310 return self._stream.read(l)
311
311
312 def seek(self, pos):
312 def seek(self, pos):
313 return self._stream.seek(pos)
313 return self._stream.seek(pos)
314
314
315 def tell(self):
315 def tell(self):
316 return self._stream.tell()
316 return self._stream.tell()
317
317
318 def close(self):
318 def close(self):
319 return self._stream.close()
319 return self._stream.close()
320
320
321 def _chunklength(self):
321 def _chunklength(self):
322 d = readexactly(self._stream, 4)
322 d = readexactly(self._stream, 4)
323 l = struct.unpack(b">l", d)[0]
323 l = struct.unpack(b">l", d)[0]
324 if l <= 4:
324 if l <= 4:
325 if l:
325 if l:
326 raise error.Abort(_(b"invalid chunk length %d") % l)
326 raise error.Abort(_(b"invalid chunk length %d") % l)
327 return 0
327 return 0
328 if self.callback:
328 if self.callback:
329 self.callback()
329 self.callback()
330 return l - 4
330 return l - 4
331
331
332 def changelogheader(self):
332 def changelogheader(self):
333 """v10 does not have a changelog header chunk"""
333 """v10 does not have a changelog header chunk"""
334 return {}
334 return {}
335
335
336 def manifestheader(self):
336 def manifestheader(self):
337 """v10 does not have a manifest header chunk"""
337 """v10 does not have a manifest header chunk"""
338 return {}
338 return {}
339
339
340 def filelogheader(self):
340 def filelogheader(self):
341 """return the header of the filelogs chunk, v10 only has the filename"""
341 """return the header of the filelogs chunk, v10 only has the filename"""
342 l = self._chunklength()
342 l = self._chunklength()
343 if not l:
343 if not l:
344 return {}
344 return {}
345 fname = readexactly(self._stream, l)
345 fname = readexactly(self._stream, l)
346 return {b'filename': fname}
346 return {b'filename': fname}
347
347
348 def _deltaheader(self, headertuple, prevnode):
348 def _deltaheader(self, headertuple, prevnode):
349 node, p1, p2, cs = headertuple
349 node, p1, p2, cs = headertuple
350 if prevnode is None:
350 if prevnode is None:
351 deltabase = p1
351 deltabase = p1
352 else:
352 else:
353 deltabase = prevnode
353 deltabase = prevnode
354 flags = 0
354 flags = 0
355 protocol_flags = 0
355 protocol_flags = 0
356 return node, p1, p2, deltabase, cs, flags, protocol_flags
356 return node, p1, p2, deltabase, cs, flags, protocol_flags
357
357
358 def deltachunk(self, prevnode):
358 def deltachunk(self, prevnode):
359 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags, sidedata, proto_flags)
359 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags, sidedata, proto_flags)
360 l = self._chunklength()
360 l = self._chunklength()
361 if not l:
361 if not l:
362 return {}
362 return {}
363 headerdata = readexactly(self._stream, self.deltaheadersize)
363 headerdata = readexactly(self._stream, self.deltaheadersize)
364 header = self.deltaheader.unpack(headerdata)
364 header = self.deltaheader.unpack(headerdata)
365 delta = readexactly(self._stream, l - self.deltaheadersize)
365 delta = readexactly(self._stream, l - self.deltaheadersize)
366 header = self._deltaheader(header, prevnode)
366 header = self._deltaheader(header, prevnode)
367 node, p1, p2, deltabase, cs, flags, protocol_flags = header
367 node, p1, p2, deltabase, cs, flags, protocol_flags = header
368 return node, p1, p2, cs, deltabase, delta, flags, {}, protocol_flags
368 return node, p1, p2, cs, deltabase, delta, flags, {}, protocol_flags
369
369
370 def getchunks(self):
370 def getchunks(self):
371 """returns all the chunks contains in the bundle
371 """returns all the chunks contains in the bundle
372
372
373 Used when you need to forward the binary stream to a file or another
373 Used when you need to forward the binary stream to a file or another
374 network API. To do so, it parse the changegroup data, otherwise it will
374 network API. To do so, it parse the changegroup data, otherwise it will
375 block in case of sshrepo because it don't know the end of the stream.
375 block in case of sshrepo because it don't know the end of the stream.
376 """
376 """
377 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
377 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
378 # and a list of filelogs. For changegroup 3, we expect 4 parts:
378 # and a list of filelogs. For changegroup 3, we expect 4 parts:
379 # changelog, manifestlog, a list of tree manifestlogs, and a list of
379 # changelog, manifestlog, a list of tree manifestlogs, and a list of
380 # filelogs.
380 # filelogs.
381 #
381 #
382 # Changelog and manifestlog parts are terminated with empty chunks. The
382 # Changelog and manifestlog parts are terminated with empty chunks. The
383 # tree and file parts are a list of entry sections. Each entry section
383 # tree and file parts are a list of entry sections. Each entry section
384 # is a series of chunks terminating in an empty chunk. The list of these
384 # is a series of chunks terminating in an empty chunk. The list of these
385 # entry sections is terminated in yet another empty chunk, so we know
385 # entry sections is terminated in yet another empty chunk, so we know
386 # we've reached the end of the tree/file list when we reach an empty
386 # we've reached the end of the tree/file list when we reach an empty
387 # chunk that was proceeded by no non-empty chunks.
387 # chunk that was proceeded by no non-empty chunks.
388
388
389 parts = 0
389 parts = 0
390 while parts < 2 + self._grouplistcount:
390 while parts < 2 + self._grouplistcount:
391 noentries = True
391 noentries = True
392 while True:
392 while True:
393 chunk = getchunk(self)
393 chunk = getchunk(self)
394 if not chunk:
394 if not chunk:
395 # The first two empty chunks represent the end of the
395 # The first two empty chunks represent the end of the
396 # changelog and the manifestlog portions. The remaining
396 # changelog and the manifestlog portions. The remaining
397 # empty chunks represent either A) the end of individual
397 # empty chunks represent either A) the end of individual
398 # tree or file entries in the file list, or B) the end of
398 # tree or file entries in the file list, or B) the end of
399 # the entire list. It's the end of the entire list if there
399 # the entire list. It's the end of the entire list if there
400 # were no entries (i.e. noentries is True).
400 # were no entries (i.e. noentries is True).
401 if parts < 2:
401 if parts < 2:
402 parts += 1
402 parts += 1
403 elif noentries:
403 elif noentries:
404 parts += 1
404 parts += 1
405 break
405 break
406 noentries = False
406 noentries = False
407 yield chunkheader(len(chunk))
407 yield chunkheader(len(chunk))
408 pos = 0
408 pos = 0
409 while pos < len(chunk):
409 while pos < len(chunk):
410 next = pos + 2 ** 20
410 next = pos + 2 ** 20
411 yield chunk[pos:next]
411 yield chunk[pos:next]
412 pos = next
412 pos = next
413 yield closechunk()
413 yield closechunk()
414
414
415 def _unpackmanifests(
415 def _unpackmanifests(
416 self,
416 self,
417 repo,
417 repo,
418 revmap,
418 revmap,
419 trp,
419 trp,
420 prog,
420 prog,
421 addrevisioncb=None,
421 addrevisioncb=None,
422 debug_info=None,
422 debug_info=None,
423 delta_base_reuse_policy=None,
423 delta_base_reuse_policy=None,
424 ):
424 ):
425 self.callback = prog.increment
425 self.callback = prog.increment
426 # no need to check for empty manifest group here:
426 # no need to check for empty manifest group here:
427 # if the result of the merge of 1 and 2 is the same in 3 and 4,
427 # if the result of the merge of 1 and 2 is the same in 3 and 4,
428 # no new manifest will be created and the manifest group will
428 # no new manifest will be created and the manifest group will
429 # be empty during the pull
429 # be empty during the pull
430 self.manifestheader()
430 self.manifestheader()
431 deltas = self.deltaiter()
431 deltas = self.deltaiter()
432 storage = repo.manifestlog.getstorage(b'')
432 storage = repo.manifestlog.getstorage(b'')
433 storage.addgroup(
433 storage.addgroup(
434 deltas,
434 deltas,
435 revmap,
435 revmap,
436 trp,
436 trp,
437 addrevisioncb=addrevisioncb,
437 addrevisioncb=addrevisioncb,
438 debug_info=debug_info,
438 debug_info=debug_info,
439 delta_base_reuse_policy=delta_base_reuse_policy,
439 delta_base_reuse_policy=delta_base_reuse_policy,
440 )
440 )
441 prog.complete()
441 prog.complete()
442 self.callback = None
442 self.callback = None
443
443
444 def apply(
444 def apply(
445 self,
445 self,
446 repo,
446 repo,
447 tr,
447 tr,
448 srctype,
448 srctype,
449 url,
449 url,
450 targetphase=phases.draft,
450 targetphase=phases.draft,
451 expectedtotal=None,
451 expectedtotal=None,
452 sidedata_categories=None,
452 sidedata_categories=None,
453 delta_base_reuse_policy=None,
453 delta_base_reuse_policy=None,
454 ):
454 ):
455 """Add the changegroup returned by source.read() to this repo.
455 """Add the changegroup returned by source.read() to this repo.
456 srctype is a string like 'push', 'pull', or 'unbundle'. url is
456 srctype is a string like 'push', 'pull', or 'unbundle'. url is
457 the URL of the repo where this changegroup is coming from.
457 the URL of the repo where this changegroup is coming from.
458
458
459 Return an integer summarizing the change to this repo:
459 Return an integer summarizing the change to this repo:
460 - nothing changed or no source: 0
460 - nothing changed or no source: 0
461 - more heads than before: 1+added heads (2..n)
461 - more heads than before: 1+added heads (2..n)
462 - fewer heads than before: -1-removed heads (-2..-n)
462 - fewer heads than before: -1-removed heads (-2..-n)
463 - number of heads stays the same: 1
463 - number of heads stays the same: 1
464
464
465 `sidedata_categories` is an optional set of the remote's sidedata wanted
465 `sidedata_categories` is an optional set of the remote's sidedata wanted
466 categories.
466 categories.
467
467
468 `delta_base_reuse_policy` is an optional argument, when set to a value
468 `delta_base_reuse_policy` is an optional argument, when set to a value
469 it will control the way the delta contained into the bundle are reused
469 it will control the way the delta contained into the bundle are reused
470 when applied in the revlog.
470 when applied in the revlog.
471
471
472 See `DELTA_BASE_REUSE_*` entry in mercurial.revlogutils.constants.
472 See `DELTA_BASE_REUSE_*` entry in mercurial.revlogutils.constants.
473 """
473 """
474 repo = repo.unfiltered()
474 repo = repo.unfiltered()
475
475
476 debug_info = None
476 debug_info = None
477 if repo.ui.configbool(b'debug', b'unbundling-stats'):
477 if repo.ui.configbool(b'debug', b'unbundling-stats'):
478 debug_info = []
478 debug_info = []
479
479
480 # Only useful if we're adding sidedata categories. If both peers have
480 # Only useful if we're adding sidedata categories. If both peers have
481 # the same categories, then we simply don't do anything.
481 # the same categories, then we simply don't do anything.
482 adding_sidedata = (
482 adding_sidedata = (
483 (
483 (
484 requirements.REVLOGV2_REQUIREMENT in repo.requirements
484 requirements.REVLOGV2_REQUIREMENT in repo.requirements
485 or requirements.CHANGELOGV2_REQUIREMENT in repo.requirements
485 or requirements.CHANGELOGV2_REQUIREMENT in repo.requirements
486 )
486 )
487 and self.version == b'04'
487 and self.version == b'04'
488 and srctype == b'pull'
488 and srctype == b'pull'
489 )
489 )
490 if adding_sidedata:
490 if adding_sidedata:
491 sidedata_helpers = sidedatamod.get_sidedata_helpers(
491 sidedata_helpers = sidedatamod.get_sidedata_helpers(
492 repo,
492 repo,
493 sidedata_categories or set(),
493 sidedata_categories or set(),
494 pull=True,
494 pull=True,
495 )
495 )
496 else:
496 else:
497 sidedata_helpers = None
497 sidedata_helpers = None
498
498
499 def csmap(x):
499 def csmap(x):
500 repo.ui.debug(b"add changeset %s\n" % short(x))
500 repo.ui.debug(b"add changeset %s\n" % short(x))
501 return len(cl)
501 return len(cl)
502
502
503 def revmap(x):
503 def revmap(x):
504 return cl.rev(x)
504 return cl.rev(x)
505
505
506 try:
506 try:
507 # The transaction may already carry source information. In this
507 # The transaction may already carry source information. In this
508 # case we use the top level data. We overwrite the argument
508 # case we use the top level data. We overwrite the argument
509 # because we need to use the top level value (if they exist)
509 # because we need to use the top level value (if they exist)
510 # in this function.
510 # in this function.
511 srctype = tr.hookargs.setdefault(b'source', srctype)
511 srctype = tr.hookargs.setdefault(b'source', srctype)
512 tr.hookargs.setdefault(b'url', url)
512 tr.hookargs.setdefault(b'url', url)
513 repo.hook(
513 repo.hook(
514 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
514 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
515 )
515 )
516
516
517 # write changelog data to temp files so concurrent readers
517 # write changelog data to temp files so concurrent readers
518 # will not see an inconsistent view
518 # will not see an inconsistent view
519 cl = repo.changelog
519 cl = repo.changelog
520 cl.delayupdate(tr)
520 cl.delayupdate(tr)
521 oldheads = set(cl.heads())
521 oldrevcount = len(cl)
522
522
523 trp = weakref.proxy(tr)
523 trp = weakref.proxy(tr)
524 # pull off the changeset group
524 # pull off the changeset group
525 repo.ui.status(_(b"adding changesets\n"))
525 repo.ui.status(_(b"adding changesets\n"))
526 clstart = len(cl)
526 clstart = len(cl)
527 progress = repo.ui.makeprogress(
527 progress = repo.ui.makeprogress(
528 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
528 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
529 )
529 )
530 self.callback = progress.increment
530 self.callback = progress.increment
531
531
532 efilesset = set()
532 efilesset = set()
533 duprevs = []
533 duprevs = []
534
534
535 def ondupchangelog(cl, rev):
535 def ondupchangelog(cl, rev):
536 if rev < clstart:
536 if rev < clstart:
537 duprevs.append(rev) # pytype: disable=attribute-error
537 duprevs.append(rev) # pytype: disable=attribute-error
538
538
539 def onchangelog(cl, rev):
539 def onchangelog(cl, rev):
540 ctx = cl.changelogrevision(rev)
540 ctx = cl.changelogrevision(rev)
541 assert efilesset is not None # help pytype
541 assert efilesset is not None # help pytype
542 efilesset.update(ctx.files)
542 efilesset.update(ctx.files)
543 repo.register_changeset(rev, ctx)
543 repo.register_changeset(rev, ctx)
544
544
545 self.changelogheader()
545 self.changelogheader()
546 deltas = self.deltaiter()
546 deltas = self.deltaiter()
547 if not cl.addgroup(
547 if not cl.addgroup(
548 deltas,
548 deltas,
549 csmap,
549 csmap,
550 trp,
550 trp,
551 alwayscache=True,
551 alwayscache=True,
552 addrevisioncb=onchangelog,
552 addrevisioncb=onchangelog,
553 duplicaterevisioncb=ondupchangelog,
553 duplicaterevisioncb=ondupchangelog,
554 debug_info=debug_info,
554 debug_info=debug_info,
555 delta_base_reuse_policy=delta_base_reuse_policy,
555 delta_base_reuse_policy=delta_base_reuse_policy,
556 ):
556 ):
557 repo.ui.develwarn(
557 repo.ui.develwarn(
558 b'applied empty changelog from changegroup',
558 b'applied empty changelog from changegroup',
559 config=b'warn-empty-changegroup',
559 config=b'warn-empty-changegroup',
560 )
560 )
561 efiles = len(efilesset)
561 efiles = len(efilesset)
562 clend = len(cl)
562 clend = len(cl)
563 changesets = clend - clstart
563 changesets = clend - clstart
564 progress.complete()
564 progress.complete()
565 del deltas
565 del deltas
566 # TODO Python 2.7 removal
566 # TODO Python 2.7 removal
567 # del efilesset
567 # del efilesset
568 efilesset = None
568 efilesset = None
569 self.callback = None
569 self.callback = None
570
570
571 # Keep track of the (non-changelog) revlogs we've updated and their
571 # Keep track of the (non-changelog) revlogs we've updated and their
572 # range of new revisions for sidedata rewrite.
572 # range of new revisions for sidedata rewrite.
573 # TODO do something more efficient than keeping the reference to
573 # TODO do something more efficient than keeping the reference to
574 # the revlogs, especially memory-wise.
574 # the revlogs, especially memory-wise.
575 touched_manifests = {}
575 touched_manifests = {}
576 touched_filelogs = {}
576 touched_filelogs = {}
577
577
578 # pull off the manifest group
578 # pull off the manifest group
579 repo.ui.status(_(b"adding manifests\n"))
579 repo.ui.status(_(b"adding manifests\n"))
580 # We know that we'll never have more manifests than we had
580 # We know that we'll never have more manifests than we had
581 # changesets.
581 # changesets.
582 progress = repo.ui.makeprogress(
582 progress = repo.ui.makeprogress(
583 _(b'manifests'), unit=_(b'chunks'), total=changesets
583 _(b'manifests'), unit=_(b'chunks'), total=changesets
584 )
584 )
585 on_manifest_rev = None
585 on_manifest_rev = None
586 if sidedata_helpers:
586 if sidedata_helpers:
587 if revlog_constants.KIND_MANIFESTLOG in sidedata_helpers[1]:
587 if revlog_constants.KIND_MANIFESTLOG in sidedata_helpers[1]:
588
588
589 def on_manifest_rev(manifest, rev):
589 def on_manifest_rev(manifest, rev):
590 range = touched_manifests.get(manifest)
590 range = touched_manifests.get(manifest)
591 if not range:
591 if not range:
592 touched_manifests[manifest] = (rev, rev)
592 touched_manifests[manifest] = (rev, rev)
593 else:
593 else:
594 assert rev == range[1] + 1
594 assert rev == range[1] + 1
595 touched_manifests[manifest] = (range[0], rev)
595 touched_manifests[manifest] = (range[0], rev)
596
596
597 self._unpackmanifests(
597 self._unpackmanifests(
598 repo,
598 repo,
599 revmap,
599 revmap,
600 trp,
600 trp,
601 progress,
601 progress,
602 addrevisioncb=on_manifest_rev,
602 addrevisioncb=on_manifest_rev,
603 debug_info=debug_info,
603 debug_info=debug_info,
604 delta_base_reuse_policy=delta_base_reuse_policy,
604 delta_base_reuse_policy=delta_base_reuse_policy,
605 )
605 )
606
606
607 needfiles = {}
607 needfiles = {}
608 if repo.ui.configbool(b'server', b'validate'):
608 if repo.ui.configbool(b'server', b'validate'):
609 cl = repo.changelog
609 cl = repo.changelog
610 ml = repo.manifestlog
610 ml = repo.manifestlog
611 # validate incoming csets have their manifests
611 # validate incoming csets have their manifests
612 for cset in range(clstart, clend):
612 for cset in range(clstart, clend):
613 mfnode = cl.changelogrevision(cset).manifest
613 mfnode = cl.changelogrevision(cset).manifest
614 mfest = ml[mfnode].readdelta()
614 mfest = ml[mfnode].readdelta()
615 # store file nodes we must see
615 # store file nodes we must see
616 for f, n in mfest.items():
616 for f, n in mfest.items():
617 needfiles.setdefault(f, set()).add(n)
617 needfiles.setdefault(f, set()).add(n)
618
618
619 on_filelog_rev = None
619 on_filelog_rev = None
620 if sidedata_helpers:
620 if sidedata_helpers:
621 if revlog_constants.KIND_FILELOG in sidedata_helpers[1]:
621 if revlog_constants.KIND_FILELOG in sidedata_helpers[1]:
622
622
623 def on_filelog_rev(filelog, rev):
623 def on_filelog_rev(filelog, rev):
624 range = touched_filelogs.get(filelog)
624 range = touched_filelogs.get(filelog)
625 if not range:
625 if not range:
626 touched_filelogs[filelog] = (rev, rev)
626 touched_filelogs[filelog] = (rev, rev)
627 else:
627 else:
628 assert rev == range[1] + 1
628 assert rev == range[1] + 1
629 touched_filelogs[filelog] = (range[0], rev)
629 touched_filelogs[filelog] = (range[0], rev)
630
630
631 # process the files
631 # process the files
632 repo.ui.status(_(b"adding file changes\n"))
632 repo.ui.status(_(b"adding file changes\n"))
633 newrevs, newfiles = _addchangegroupfiles(
633 newrevs, newfiles = _addchangegroupfiles(
634 repo,
634 repo,
635 self,
635 self,
636 revmap,
636 revmap,
637 trp,
637 trp,
638 efiles,
638 efiles,
639 needfiles,
639 needfiles,
640 addrevisioncb=on_filelog_rev,
640 addrevisioncb=on_filelog_rev,
641 debug_info=debug_info,
641 debug_info=debug_info,
642 delta_base_reuse_policy=delta_base_reuse_policy,
642 delta_base_reuse_policy=delta_base_reuse_policy,
643 )
643 )
644
644
645 if sidedata_helpers:
645 if sidedata_helpers:
646 if revlog_constants.KIND_CHANGELOG in sidedata_helpers[1]:
646 if revlog_constants.KIND_CHANGELOG in sidedata_helpers[1]:
647 cl.rewrite_sidedata(
647 cl.rewrite_sidedata(
648 trp, sidedata_helpers, clstart, clend - 1
648 trp, sidedata_helpers, clstart, clend - 1
649 )
649 )
650 for mf, (startrev, endrev) in touched_manifests.items():
650 for mf, (startrev, endrev) in touched_manifests.items():
651 mf.rewrite_sidedata(trp, sidedata_helpers, startrev, endrev)
651 mf.rewrite_sidedata(trp, sidedata_helpers, startrev, endrev)
652 for fl, (startrev, endrev) in touched_filelogs.items():
652 for fl, (startrev, endrev) in touched_filelogs.items():
653 fl.rewrite_sidedata(trp, sidedata_helpers, startrev, endrev)
653 fl.rewrite_sidedata(trp, sidedata_helpers, startrev, endrev)
654
654
655 # making sure the value exists
655 # making sure the value exists
656 tr.changes.setdefault(b'changegroup-count-changesets', 0)
656 tr.changes.setdefault(b'changegroup-count-changesets', 0)
657 tr.changes.setdefault(b'changegroup-count-revisions', 0)
657 tr.changes.setdefault(b'changegroup-count-revisions', 0)
658 tr.changes.setdefault(b'changegroup-count-files', 0)
658 tr.changes.setdefault(b'changegroup-count-files', 0)
659 tr.changes.setdefault(b'changegroup-count-heads', 0)
659 tr.changes.setdefault(b'changegroup-count-heads', 0)
660
660
661 # some code use bundle operation for internal purpose. They usually
661 # some code use bundle operation for internal purpose. They usually
662 # set `ui.quiet` to do this outside of user sight. Size the report
662 # set `ui.quiet` to do this outside of user sight. Size the report
663 # of such operation now happens at the end of the transaction, that
663 # of such operation now happens at the end of the transaction, that
664 # ui.quiet has not direct effect on the output.
664 # ui.quiet has not direct effect on the output.
665 #
665 #
666 # To preserve this intend use an inelegant hack, we fail to report
666 # To preserve this intend use an inelegant hack, we fail to report
667 # the change if `quiet` is set. We should probably move to
667 # the change if `quiet` is set. We should probably move to
668 # something better, but this is a good first step to allow the "end
668 # something better, but this is a good first step to allow the "end
669 # of transaction report" to pass tests.
669 # of transaction report" to pass tests.
670 if not repo.ui.quiet:
670 if not repo.ui.quiet:
671 tr.changes[b'changegroup-count-changesets'] += changesets
671 tr.changes[b'changegroup-count-changesets'] += changesets
672 tr.changes[b'changegroup-count-revisions'] += newrevs
672 tr.changes[b'changegroup-count-revisions'] += newrevs
673 tr.changes[b'changegroup-count-files'] += newfiles
673 tr.changes[b'changegroup-count-files'] += newfiles
674
674
675 deltaheads = 0
675 deltaheads = 0
676 if oldheads:
676 newrevcount = len(cl)
677 heads = cl.heads()
677 heads_removed, heads_added = cl.diffheads(oldrevcount, newrevcount)
678 deltaheads += len(heads) - len(oldheads)
678 deltaheads += len(heads_added) - len(heads_removed)
679 for h in heads:
679 for h in heads_added:
680 if h not in oldheads and repo[h].closesbranch():
680 if repo[h].closesbranch():
681 deltaheads -= 1
681 deltaheads -= 1
682
682
683 # see previous comment about checking ui.quiet
683 # see previous comment about checking ui.quiet
684 if not repo.ui.quiet:
684 if not repo.ui.quiet:
685 tr.changes[b'changegroup-count-heads'] += deltaheads
685 tr.changes[b'changegroup-count-heads'] += deltaheads
686 repo.invalidatevolatilesets()
686 repo.invalidatevolatilesets()
687
687
688 if changesets > 0:
688 if changesets > 0:
689 if b'node' not in tr.hookargs:
689 if b'node' not in tr.hookargs:
690 tr.hookargs[b'node'] = hex(cl.node(clstart))
690 tr.hookargs[b'node'] = hex(cl.node(clstart))
691 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
691 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
692 hookargs = dict(tr.hookargs)
692 hookargs = dict(tr.hookargs)
693 else:
693 else:
694 hookargs = dict(tr.hookargs)
694 hookargs = dict(tr.hookargs)
695 hookargs[b'node'] = hex(cl.node(clstart))
695 hookargs[b'node'] = hex(cl.node(clstart))
696 hookargs[b'node_last'] = hex(cl.node(clend - 1))
696 hookargs[b'node_last'] = hex(cl.node(clend - 1))
697 repo.hook(
697 repo.hook(
698 b'pretxnchangegroup',
698 b'pretxnchangegroup',
699 throw=True,
699 throw=True,
700 **pycompat.strkwargs(hookargs)
700 **pycompat.strkwargs(hookargs)
701 )
701 )
702
702
703 added = range(clstart, clend)
703 added = range(clstart, clend)
704 phaseall = None
704 phaseall = None
705 if srctype in (b'push', b'serve'):
705 if srctype in (b'push', b'serve'):
706 # Old servers can not push the boundary themselves.
706 # Old servers can not push the boundary themselves.
707 # New servers won't push the boundary if changeset already
707 # New servers won't push the boundary if changeset already
708 # exists locally as secret
708 # exists locally as secret
709 #
709 #
710 # We should not use added here but the list of all change in
710 # We should not use added here but the list of all change in
711 # the bundle
711 # the bundle
712 if repo.publishing():
712 if repo.publishing():
713 targetphase = phaseall = phases.public
713 targetphase = phaseall = phases.public
714 else:
714 else:
715 # closer target phase computation
715 # closer target phase computation
716
716
717 # Those changesets have been pushed from the
717 # Those changesets have been pushed from the
718 # outside, their phases are going to be pushed
718 # outside, their phases are going to be pushed
719 # alongside. Therefor `targetphase` is
719 # alongside. Therefor `targetphase` is
720 # ignored.
720 # ignored.
721 targetphase = phaseall = phases.draft
721 targetphase = phaseall = phases.draft
722 if added:
722 if added:
723 phases.registernew(repo, tr, targetphase, added)
723 phases.registernew(repo, tr, targetphase, added)
724 if phaseall is not None:
724 if phaseall is not None:
725 if duprevs:
725 if duprevs:
726 duprevs.extend(added)
726 duprevs.extend(added)
727 else:
727 else:
728 duprevs = added
728 duprevs = added
729 phases.advanceboundary(repo, tr, phaseall, [], revs=duprevs)
729 phases.advanceboundary(repo, tr, phaseall, [], revs=duprevs)
730 duprevs = []
730 duprevs = []
731
731
732 if changesets > 0:
732 if changesets > 0:
733
733
734 def runhooks(unused_success):
734 def runhooks(unused_success):
735 # These hooks run when the lock releases, not when the
735 # These hooks run when the lock releases, not when the
736 # transaction closes. So it's possible for the changelog
736 # transaction closes. So it's possible for the changelog
737 # to have changed since we last saw it.
737 # to have changed since we last saw it.
738 if clstart >= len(repo):
738 if clstart >= len(repo):
739 return
739 return
740
740
741 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
741 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
742
742
743 for rev in added:
743 for rev in added:
744 args = hookargs.copy()
744 args = hookargs.copy()
745 args[b'node'] = hex(cl.node(rev))
745 args[b'node'] = hex(cl.node(rev))
746 del args[b'node_last']
746 del args[b'node_last']
747 repo.hook(b"incoming", **pycompat.strkwargs(args))
747 repo.hook(b"incoming", **pycompat.strkwargs(args))
748
748
749 newheads = [h for h in repo.heads() if h not in oldheads]
750 repo.ui.log(
749 repo.ui.log(
751 b"incoming",
750 b"incoming",
752 b"%d incoming changes - new heads: %s\n",
751 b"%d incoming changes - new heads: %s\n",
753 len(added),
752 len(added),
754 b', '.join([hex(c[:6]) for c in newheads]),
753 b', '.join([hex(c[:6]) for c in heads_added]),
755 )
754 )
756
755
757 tr.addpostclose(
756 tr.addpostclose(
758 b'changegroup-runhooks-%020i' % clstart,
757 b'changegroup-runhooks-%020i' % clstart,
759 lambda tr: repo._afterlock(runhooks),
758 lambda tr: repo._afterlock(runhooks),
760 )
759 )
761 if debug_info is not None:
760 if debug_info is not None:
762 display_unbundle_debug_info(repo.ui, debug_info)
761 display_unbundle_debug_info(repo.ui, debug_info)
763 finally:
762 finally:
764 repo.ui.flush()
763 repo.ui.flush()
765 # never return 0 here:
764 # never return 0 here:
766 if deltaheads < 0:
765 if deltaheads < 0:
767 ret = deltaheads - 1
766 ret = deltaheads - 1
768 else:
767 else:
769 ret = deltaheads + 1
768 ret = deltaheads + 1
770 return ret
769 return ret
771
770
772 def deltaiter(self):
771 def deltaiter(self):
773 """
772 """
774 returns an iterator of the deltas in this changegroup
773 returns an iterator of the deltas in this changegroup
775
774
776 Useful for passing to the underlying storage system to be stored.
775 Useful for passing to the underlying storage system to be stored.
777 """
776 """
778 chain = None
777 chain = None
779 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
778 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
780 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags, sidedata, proto_flags)
779 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags, sidedata, proto_flags)
781 yield chunkdata[:8]
780 yield chunkdata[:8]
782 chain = chunkdata[0]
781 chain = chunkdata[0]
783
782
784
783
785 class cg2unpacker(cg1unpacker):
784 class cg2unpacker(cg1unpacker):
786 """Unpacker for cg2 streams.
785 """Unpacker for cg2 streams.
787
786
788 cg2 streams add support for generaldelta, so the delta header
787 cg2 streams add support for generaldelta, so the delta header
789 format is slightly different. All other features about the data
788 format is slightly different. All other features about the data
790 remain the same.
789 remain the same.
791 """
790 """
792
791
793 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
792 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
794 deltaheadersize = deltaheader.size
793 deltaheadersize = deltaheader.size
795 version = b'02'
794 version = b'02'
796
795
797 def _deltaheader(self, headertuple, prevnode):
796 def _deltaheader(self, headertuple, prevnode):
798 node, p1, p2, deltabase, cs = headertuple
797 node, p1, p2, deltabase, cs = headertuple
799 flags = 0
798 flags = 0
800 protocol_flags = 0
799 protocol_flags = 0
801 return node, p1, p2, deltabase, cs, flags, protocol_flags
800 return node, p1, p2, deltabase, cs, flags, protocol_flags
802
801
803
802
804 class cg3unpacker(cg2unpacker):
803 class cg3unpacker(cg2unpacker):
805 """Unpacker for cg3 streams.
804 """Unpacker for cg3 streams.
806
805
807 cg3 streams add support for exchanging treemanifests and revlog
806 cg3 streams add support for exchanging treemanifests and revlog
808 flags. It adds the revlog flags to the delta header and an empty chunk
807 flags. It adds the revlog flags to the delta header and an empty chunk
809 separating manifests and files.
808 separating manifests and files.
810 """
809 """
811
810
812 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
811 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
813 deltaheadersize = deltaheader.size
812 deltaheadersize = deltaheader.size
814 version = b'03'
813 version = b'03'
815 _grouplistcount = 2 # One list of manifests and one list of files
814 _grouplistcount = 2 # One list of manifests and one list of files
816
815
817 def _deltaheader(self, headertuple, prevnode):
816 def _deltaheader(self, headertuple, prevnode):
818 node, p1, p2, deltabase, cs, flags = headertuple
817 node, p1, p2, deltabase, cs, flags = headertuple
819 protocol_flags = 0
818 protocol_flags = 0
820 return node, p1, p2, deltabase, cs, flags, protocol_flags
819 return node, p1, p2, deltabase, cs, flags, protocol_flags
821
820
822 def _unpackmanifests(
821 def _unpackmanifests(
823 self,
822 self,
824 repo,
823 repo,
825 revmap,
824 revmap,
826 trp,
825 trp,
827 prog,
826 prog,
828 addrevisioncb=None,
827 addrevisioncb=None,
829 debug_info=None,
828 debug_info=None,
830 delta_base_reuse_policy=None,
829 delta_base_reuse_policy=None,
831 ):
830 ):
832 super(cg3unpacker, self)._unpackmanifests(
831 super(cg3unpacker, self)._unpackmanifests(
833 repo,
832 repo,
834 revmap,
833 revmap,
835 trp,
834 trp,
836 prog,
835 prog,
837 addrevisioncb=addrevisioncb,
836 addrevisioncb=addrevisioncb,
838 debug_info=debug_info,
837 debug_info=debug_info,
839 delta_base_reuse_policy=delta_base_reuse_policy,
838 delta_base_reuse_policy=delta_base_reuse_policy,
840 )
839 )
841 for chunkdata in iter(self.filelogheader, {}):
840 for chunkdata in iter(self.filelogheader, {}):
842 # If we get here, there are directory manifests in the changegroup
841 # If we get here, there are directory manifests in the changegroup
843 d = chunkdata[b"filename"]
842 d = chunkdata[b"filename"]
844 repo.ui.debug(b"adding %s revisions\n" % d)
843 repo.ui.debug(b"adding %s revisions\n" % d)
845 deltas = self.deltaiter()
844 deltas = self.deltaiter()
846 if not repo.manifestlog.getstorage(d).addgroup(
845 if not repo.manifestlog.getstorage(d).addgroup(
847 deltas,
846 deltas,
848 revmap,
847 revmap,
849 trp,
848 trp,
850 addrevisioncb=addrevisioncb,
849 addrevisioncb=addrevisioncb,
851 debug_info=debug_info,
850 debug_info=debug_info,
852 delta_base_reuse_policy=delta_base_reuse_policy,
851 delta_base_reuse_policy=delta_base_reuse_policy,
853 ):
852 ):
854 raise error.Abort(_(b"received dir revlog group is empty"))
853 raise error.Abort(_(b"received dir revlog group is empty"))
855
854
856
855
857 class cg4unpacker(cg3unpacker):
856 class cg4unpacker(cg3unpacker):
858 """Unpacker for cg4 streams.
857 """Unpacker for cg4 streams.
859
858
860 cg4 streams add support for exchanging sidedata.
859 cg4 streams add support for exchanging sidedata.
861 """
860 """
862
861
863 deltaheader = _CHANGEGROUPV4_DELTA_HEADER
862 deltaheader = _CHANGEGROUPV4_DELTA_HEADER
864 deltaheadersize = deltaheader.size
863 deltaheadersize = deltaheader.size
865 version = b'04'
864 version = b'04'
866
865
867 def _deltaheader(self, headertuple, prevnode):
866 def _deltaheader(self, headertuple, prevnode):
868 protocol_flags, node, p1, p2, deltabase, cs, flags = headertuple
867 protocol_flags, node, p1, p2, deltabase, cs, flags = headertuple
869 return node, p1, p2, deltabase, cs, flags, protocol_flags
868 return node, p1, p2, deltabase, cs, flags, protocol_flags
870
869
871 def deltachunk(self, prevnode):
870 def deltachunk(self, prevnode):
872 res = super(cg4unpacker, self).deltachunk(prevnode)
871 res = super(cg4unpacker, self).deltachunk(prevnode)
873 if not res:
872 if not res:
874 return res
873 return res
875
874
876 (
875 (
877 node,
876 node,
878 p1,
877 p1,
879 p2,
878 p2,
880 cs,
879 cs,
881 deltabase,
880 deltabase,
882 delta,
881 delta,
883 flags,
882 flags,
884 sidedata,
883 sidedata,
885 protocol_flags,
884 protocol_flags,
886 ) = res
885 ) = res
887 assert not sidedata
886 assert not sidedata
888
887
889 sidedata = {}
888 sidedata = {}
890 if protocol_flags & storageutil.CG_FLAG_SIDEDATA:
889 if protocol_flags & storageutil.CG_FLAG_SIDEDATA:
891 sidedata_raw = getchunk(self._stream)
890 sidedata_raw = getchunk(self._stream)
892 sidedata = sidedatamod.deserialize_sidedata(sidedata_raw)
891 sidedata = sidedatamod.deserialize_sidedata(sidedata_raw)
893
892
894 return (
893 return (
895 node,
894 node,
896 p1,
895 p1,
897 p2,
896 p2,
898 cs,
897 cs,
899 deltabase,
898 deltabase,
900 delta,
899 delta,
901 flags,
900 flags,
902 sidedata,
901 sidedata,
903 protocol_flags,
902 protocol_flags,
904 )
903 )
905
904
906
905
907 class headerlessfixup:
906 class headerlessfixup:
908 def __init__(self, fh, h):
907 def __init__(self, fh, h):
909 self._h = h
908 self._h = h
910 self._fh = fh
909 self._fh = fh
911
910
912 def read(self, n):
911 def read(self, n):
913 if self._h:
912 if self._h:
914 d, self._h = self._h[:n], self._h[n:]
913 d, self._h = self._h[:n], self._h[n:]
915 if len(d) < n:
914 if len(d) < n:
916 d += readexactly(self._fh, n - len(d))
915 d += readexactly(self._fh, n - len(d))
917 return d
916 return d
918 return readexactly(self._fh, n)
917 return readexactly(self._fh, n)
919
918
920
919
921 def _revisiondeltatochunks(repo, delta, headerfn):
920 def _revisiondeltatochunks(repo, delta, headerfn):
922 """Serialize a revisiondelta to changegroup chunks."""
921 """Serialize a revisiondelta to changegroup chunks."""
923
922
924 # The captured revision delta may be encoded as a delta against
923 # The captured revision delta may be encoded as a delta against
925 # a base revision or as a full revision. The changegroup format
924 # a base revision or as a full revision. The changegroup format
926 # requires that everything on the wire be deltas. So for full
925 # requires that everything on the wire be deltas. So for full
927 # revisions, we need to invent a header that says to rewrite
926 # revisions, we need to invent a header that says to rewrite
928 # data.
927 # data.
929
928
930 if delta.delta is not None:
929 if delta.delta is not None:
931 prefix, data = b'', delta.delta
930 prefix, data = b'', delta.delta
932 elif delta.basenode == repo.nullid:
931 elif delta.basenode == repo.nullid:
933 data = delta.revision
932 data = delta.revision
934 prefix = mdiff.trivialdiffheader(len(data))
933 prefix = mdiff.trivialdiffheader(len(data))
935 else:
934 else:
936 data = delta.revision
935 data = delta.revision
937 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
936 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
938
937
939 meta = headerfn(delta)
938 meta = headerfn(delta)
940
939
941 yield chunkheader(len(meta) + len(prefix) + len(data))
940 yield chunkheader(len(meta) + len(prefix) + len(data))
942 yield meta
941 yield meta
943 if prefix:
942 if prefix:
944 yield prefix
943 yield prefix
945 yield data
944 yield data
946
945
947 if delta.protocol_flags & storageutil.CG_FLAG_SIDEDATA:
946 if delta.protocol_flags & storageutil.CG_FLAG_SIDEDATA:
948 # Need a separate chunk for sidedata to be able to differentiate
947 # Need a separate chunk for sidedata to be able to differentiate
949 # "raw delta" length and sidedata length
948 # "raw delta" length and sidedata length
950 sidedata = delta.sidedata
949 sidedata = delta.sidedata
951 yield chunkheader(len(sidedata))
950 yield chunkheader(len(sidedata))
952 yield sidedata
951 yield sidedata
953
952
954
953
955 def _sortnodesellipsis(store, nodes, cl, lookup):
954 def _sortnodesellipsis(store, nodes, cl, lookup):
956 """Sort nodes for changegroup generation."""
955 """Sort nodes for changegroup generation."""
957 # Ellipses serving mode.
956 # Ellipses serving mode.
958 #
957 #
959 # In a perfect world, we'd generate better ellipsis-ified graphs
958 # In a perfect world, we'd generate better ellipsis-ified graphs
960 # for non-changelog revlogs. In practice, we haven't started doing
959 # for non-changelog revlogs. In practice, we haven't started doing
961 # that yet, so the resulting DAGs for the manifestlog and filelogs
960 # that yet, so the resulting DAGs for the manifestlog and filelogs
962 # are actually full of bogus parentage on all the ellipsis
961 # are actually full of bogus parentage on all the ellipsis
963 # nodes. This has the side effect that, while the contents are
962 # nodes. This has the side effect that, while the contents are
964 # correct, the individual DAGs might be completely out of whack in
963 # correct, the individual DAGs might be completely out of whack in
965 # a case like 882681bc3166 and its ancestors (back about 10
964 # a case like 882681bc3166 and its ancestors (back about 10
966 # revisions or so) in the main hg repo.
965 # revisions or so) in the main hg repo.
967 #
966 #
968 # The one invariant we *know* holds is that the new (potentially
967 # The one invariant we *know* holds is that the new (potentially
969 # bogus) DAG shape will be valid if we order the nodes in the
968 # bogus) DAG shape will be valid if we order the nodes in the
970 # order that they're introduced in dramatis personae by the
969 # order that they're introduced in dramatis personae by the
971 # changelog, so what we do is we sort the non-changelog histories
970 # changelog, so what we do is we sort the non-changelog histories
972 # by the order in which they are used by the changelog.
971 # by the order in which they are used by the changelog.
973 key = lambda n: cl.rev(lookup(n))
972 key = lambda n: cl.rev(lookup(n))
974 return sorted(nodes, key=key)
973 return sorted(nodes, key=key)
975
974
976
975
977 def _resolvenarrowrevisioninfo(
976 def _resolvenarrowrevisioninfo(
978 cl,
977 cl,
979 store,
978 store,
980 ischangelog,
979 ischangelog,
981 rev,
980 rev,
982 linkrev,
981 linkrev,
983 linknode,
982 linknode,
984 clrevtolocalrev,
983 clrevtolocalrev,
985 fullclnodes,
984 fullclnodes,
986 precomputedellipsis,
985 precomputedellipsis,
987 ):
986 ):
988 linkparents = precomputedellipsis[linkrev]
987 linkparents = precomputedellipsis[linkrev]
989
988
990 def local(clrev):
989 def local(clrev):
991 """Turn a changelog revnum into a local revnum.
990 """Turn a changelog revnum into a local revnum.
992
991
993 The ellipsis dag is stored as revnums on the changelog,
992 The ellipsis dag is stored as revnums on the changelog,
994 but when we're producing ellipsis entries for
993 but when we're producing ellipsis entries for
995 non-changelog revlogs, we need to turn those numbers into
994 non-changelog revlogs, we need to turn those numbers into
996 something local. This does that for us, and during the
995 something local. This does that for us, and during the
997 changelog sending phase will also expand the stored
996 changelog sending phase will also expand the stored
998 mappings as needed.
997 mappings as needed.
999 """
998 """
1000 if clrev == nullrev:
999 if clrev == nullrev:
1001 return nullrev
1000 return nullrev
1002
1001
1003 if ischangelog:
1002 if ischangelog:
1004 return clrev
1003 return clrev
1005
1004
1006 # Walk the ellipsis-ized changelog breadth-first looking for a
1005 # Walk the ellipsis-ized changelog breadth-first looking for a
1007 # change that has been linked from the current revlog.
1006 # change that has been linked from the current revlog.
1008 #
1007 #
1009 # For a flat manifest revlog only a single step should be necessary
1008 # For a flat manifest revlog only a single step should be necessary
1010 # as all relevant changelog entries are relevant to the flat
1009 # as all relevant changelog entries are relevant to the flat
1011 # manifest.
1010 # manifest.
1012 #
1011 #
1013 # For a filelog or tree manifest dirlog however not every changelog
1012 # For a filelog or tree manifest dirlog however not every changelog
1014 # entry will have been relevant, so we need to skip some changelog
1013 # entry will have been relevant, so we need to skip some changelog
1015 # nodes even after ellipsis-izing.
1014 # nodes even after ellipsis-izing.
1016 walk = [clrev]
1015 walk = [clrev]
1017 while walk:
1016 while walk:
1018 p = walk[0]
1017 p = walk[0]
1019 walk = walk[1:]
1018 walk = walk[1:]
1020 if p in clrevtolocalrev:
1019 if p in clrevtolocalrev:
1021 return clrevtolocalrev[p]
1020 return clrevtolocalrev[p]
1022 elif p in fullclnodes:
1021 elif p in fullclnodes:
1023 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
1022 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
1024 elif p in precomputedellipsis:
1023 elif p in precomputedellipsis:
1025 walk.extend(
1024 walk.extend(
1026 [pp for pp in precomputedellipsis[p] if pp != nullrev]
1025 [pp for pp in precomputedellipsis[p] if pp != nullrev]
1027 )
1026 )
1028 else:
1027 else:
1029 # In this case, we've got an ellipsis with parents
1028 # In this case, we've got an ellipsis with parents
1030 # outside the current bundle (likely an
1029 # outside the current bundle (likely an
1031 # incremental pull). We "know" that we can use the
1030 # incremental pull). We "know" that we can use the
1032 # value of this same revlog at whatever revision
1031 # value of this same revlog at whatever revision
1033 # is pointed to by linknode. "Know" is in scare
1032 # is pointed to by linknode. "Know" is in scare
1034 # quotes because I haven't done enough examination
1033 # quotes because I haven't done enough examination
1035 # of edge cases to convince myself this is really
1034 # of edge cases to convince myself this is really
1036 # a fact - it works for all the (admittedly
1035 # a fact - it works for all the (admittedly
1037 # thorough) cases in our testsuite, but I would be
1036 # thorough) cases in our testsuite, but I would be
1038 # somewhat unsurprised to find a case in the wild
1037 # somewhat unsurprised to find a case in the wild
1039 # where this breaks down a bit. That said, I don't
1038 # where this breaks down a bit. That said, I don't
1040 # know if it would hurt anything.
1039 # know if it would hurt anything.
1041 for i in range(rev, 0, -1):
1040 for i in range(rev, 0, -1):
1042 if store.linkrev(i) == clrev:
1041 if store.linkrev(i) == clrev:
1043 return i
1042 return i
1044 # We failed to resolve a parent for this node, so
1043 # We failed to resolve a parent for this node, so
1045 # we crash the changegroup construction.
1044 # we crash the changegroup construction.
1046 if hasattr(store, 'target'):
1045 if hasattr(store, 'target'):
1047 target = store.display_id
1046 target = store.display_id
1048 else:
1047 else:
1049 # some revlog not actually a revlog
1048 # some revlog not actually a revlog
1050 target = store._revlog.display_id
1049 target = store._revlog.display_id
1051
1050
1052 raise error.Abort(
1051 raise error.Abort(
1053 b"unable to resolve parent while packing '%s' %r"
1052 b"unable to resolve parent while packing '%s' %r"
1054 b' for changeset %r' % (target, rev, clrev)
1053 b' for changeset %r' % (target, rev, clrev)
1055 )
1054 )
1056
1055
1057 return nullrev
1056 return nullrev
1058
1057
1059 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
1058 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
1060 p1, p2 = nullrev, nullrev
1059 p1, p2 = nullrev, nullrev
1061 elif len(linkparents) == 1:
1060 elif len(linkparents) == 1:
1062 (p1,) = sorted(local(p) for p in linkparents)
1061 (p1,) = sorted(local(p) for p in linkparents)
1063 p2 = nullrev
1062 p2 = nullrev
1064 else:
1063 else:
1065 p1, p2 = sorted(local(p) for p in linkparents)
1064 p1, p2 = sorted(local(p) for p in linkparents)
1066
1065
1067 p1node, p2node = store.node(p1), store.node(p2)
1066 p1node, p2node = store.node(p1), store.node(p2)
1068
1067
1069 return p1node, p2node, linknode
1068 return p1node, p2node, linknode
1070
1069
1071
1070
1072 def deltagroup(
1071 def deltagroup(
1073 repo,
1072 repo,
1074 store,
1073 store,
1075 nodes,
1074 nodes,
1076 ischangelog,
1075 ischangelog,
1077 lookup,
1076 lookup,
1078 forcedeltaparentprev,
1077 forcedeltaparentprev,
1079 topic=None,
1078 topic=None,
1080 ellipses=False,
1079 ellipses=False,
1081 clrevtolocalrev=None,
1080 clrevtolocalrev=None,
1082 fullclnodes=None,
1081 fullclnodes=None,
1083 precomputedellipsis=None,
1082 precomputedellipsis=None,
1084 sidedata_helpers=None,
1083 sidedata_helpers=None,
1085 debug_info=None,
1084 debug_info=None,
1086 ):
1085 ):
1087 """Calculate deltas for a set of revisions.
1086 """Calculate deltas for a set of revisions.
1088
1087
1089 Is a generator of ``revisiondelta`` instances.
1088 Is a generator of ``revisiondelta`` instances.
1090
1089
1091 If topic is not None, progress detail will be generated using this
1090 If topic is not None, progress detail will be generated using this
1092 topic name (e.g. changesets, manifests, etc).
1091 topic name (e.g. changesets, manifests, etc).
1093
1092
1094 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
1093 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
1095 `sidedata_helpers`.
1094 `sidedata_helpers`.
1096 """
1095 """
1097 if not nodes:
1096 if not nodes:
1098 return
1097 return
1099
1098
1100 cl = repo.changelog
1099 cl = repo.changelog
1101
1100
1102 if ischangelog:
1101 if ischangelog:
1103 # `hg log` shows changesets in storage order. To preserve order
1102 # `hg log` shows changesets in storage order. To preserve order
1104 # across clones, send out changesets in storage order.
1103 # across clones, send out changesets in storage order.
1105 nodesorder = b'storage'
1104 nodesorder = b'storage'
1106 elif ellipses:
1105 elif ellipses:
1107 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
1106 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
1108 nodesorder = b'nodes'
1107 nodesorder = b'nodes'
1109 else:
1108 else:
1110 nodesorder = None
1109 nodesorder = None
1111
1110
1112 # Perform ellipses filtering and revision massaging. We do this before
1111 # Perform ellipses filtering and revision massaging. We do this before
1113 # emitrevisions() because a) filtering out revisions creates less work
1112 # emitrevisions() because a) filtering out revisions creates less work
1114 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
1113 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
1115 # assumptions about delta choices and we would possibly send a delta
1114 # assumptions about delta choices and we would possibly send a delta
1116 # referencing a missing base revision.
1115 # referencing a missing base revision.
1117 #
1116 #
1118 # Also, calling lookup() has side-effects with regards to populating
1117 # Also, calling lookup() has side-effects with regards to populating
1119 # data structures. If we don't call lookup() for each node or if we call
1118 # data structures. If we don't call lookup() for each node or if we call
1120 # lookup() after the first pass through each node, things can break -
1119 # lookup() after the first pass through each node, things can break -
1121 # possibly intermittently depending on the python hash seed! For that
1120 # possibly intermittently depending on the python hash seed! For that
1122 # reason, we store a mapping of all linknodes during the initial node
1121 # reason, we store a mapping of all linknodes during the initial node
1123 # pass rather than use lookup() on the output side.
1122 # pass rather than use lookup() on the output side.
1124 if ellipses:
1123 if ellipses:
1125 filtered = []
1124 filtered = []
1126 adjustedparents = {}
1125 adjustedparents = {}
1127 linknodes = {}
1126 linknodes = {}
1128
1127
1129 for node in nodes:
1128 for node in nodes:
1130 rev = store.rev(node)
1129 rev = store.rev(node)
1131 linknode = lookup(node)
1130 linknode = lookup(node)
1132 linkrev = cl.rev(linknode)
1131 linkrev = cl.rev(linknode)
1133 clrevtolocalrev[linkrev] = rev
1132 clrevtolocalrev[linkrev] = rev
1134
1133
1135 # If linknode is in fullclnodes, it means the corresponding
1134 # If linknode is in fullclnodes, it means the corresponding
1136 # changeset was a full changeset and is being sent unaltered.
1135 # changeset was a full changeset and is being sent unaltered.
1137 if linknode in fullclnodes:
1136 if linknode in fullclnodes:
1138 linknodes[node] = linknode
1137 linknodes[node] = linknode
1139
1138
1140 # If the corresponding changeset wasn't in the set computed
1139 # If the corresponding changeset wasn't in the set computed
1141 # as relevant to us, it should be dropped outright.
1140 # as relevant to us, it should be dropped outright.
1142 elif linkrev not in precomputedellipsis:
1141 elif linkrev not in precomputedellipsis:
1143 continue
1142 continue
1144
1143
1145 else:
1144 else:
1146 # We could probably do this later and avoid the dict
1145 # We could probably do this later and avoid the dict
1147 # holding state. But it likely doesn't matter.
1146 # holding state. But it likely doesn't matter.
1148 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
1147 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
1149 cl,
1148 cl,
1150 store,
1149 store,
1151 ischangelog,
1150 ischangelog,
1152 rev,
1151 rev,
1153 linkrev,
1152 linkrev,
1154 linknode,
1153 linknode,
1155 clrevtolocalrev,
1154 clrevtolocalrev,
1156 fullclnodes,
1155 fullclnodes,
1157 precomputedellipsis,
1156 precomputedellipsis,
1158 )
1157 )
1159
1158
1160 adjustedparents[node] = (p1node, p2node)
1159 adjustedparents[node] = (p1node, p2node)
1161 linknodes[node] = linknode
1160 linknodes[node] = linknode
1162
1161
1163 filtered.append(node)
1162 filtered.append(node)
1164
1163
1165 nodes = filtered
1164 nodes = filtered
1166
1165
1167 # We expect the first pass to be fast, so we only engage the progress
1166 # We expect the first pass to be fast, so we only engage the progress
1168 # meter for constructing the revision deltas.
1167 # meter for constructing the revision deltas.
1169 progress = None
1168 progress = None
1170 if topic is not None:
1169 if topic is not None:
1171 progress = repo.ui.makeprogress(
1170 progress = repo.ui.makeprogress(
1172 topic, unit=_(b'chunks'), total=len(nodes)
1171 topic, unit=_(b'chunks'), total=len(nodes)
1173 )
1172 )
1174
1173
1175 configtarget = repo.ui.config(b'devel', b'bundle.delta')
1174 configtarget = repo.ui.config(b'devel', b'bundle.delta')
1176 if configtarget not in (b'', b'p1', b'full'):
1175 if configtarget not in (b'', b'p1', b'full'):
1177 msg = _(b"""config "devel.bundle.delta" as unknown value: %s""")
1176 msg = _(b"""config "devel.bundle.delta" as unknown value: %s""")
1178 repo.ui.warn(msg % configtarget)
1177 repo.ui.warn(msg % configtarget)
1179
1178
1180 deltamode = repository.CG_DELTAMODE_STD
1179 deltamode = repository.CG_DELTAMODE_STD
1181 if forcedeltaparentprev:
1180 if forcedeltaparentprev:
1182 deltamode = repository.CG_DELTAMODE_PREV
1181 deltamode = repository.CG_DELTAMODE_PREV
1183 elif configtarget == b'p1':
1182 elif configtarget == b'p1':
1184 deltamode = repository.CG_DELTAMODE_P1
1183 deltamode = repository.CG_DELTAMODE_P1
1185 elif configtarget == b'full':
1184 elif configtarget == b'full':
1186 deltamode = repository.CG_DELTAMODE_FULL
1185 deltamode = repository.CG_DELTAMODE_FULL
1187
1186
1188 revisions = store.emitrevisions(
1187 revisions = store.emitrevisions(
1189 nodes,
1188 nodes,
1190 nodesorder=nodesorder,
1189 nodesorder=nodesorder,
1191 revisiondata=True,
1190 revisiondata=True,
1192 assumehaveparentrevisions=not ellipses,
1191 assumehaveparentrevisions=not ellipses,
1193 deltamode=deltamode,
1192 deltamode=deltamode,
1194 sidedata_helpers=sidedata_helpers,
1193 sidedata_helpers=sidedata_helpers,
1195 debug_info=debug_info,
1194 debug_info=debug_info,
1196 )
1195 )
1197
1196
1198 for i, revision in enumerate(revisions):
1197 for i, revision in enumerate(revisions):
1199 if progress:
1198 if progress:
1200 progress.update(i + 1)
1199 progress.update(i + 1)
1201
1200
1202 if ellipses:
1201 if ellipses:
1203 linknode = linknodes[revision.node]
1202 linknode = linknodes[revision.node]
1204
1203
1205 if revision.node in adjustedparents:
1204 if revision.node in adjustedparents:
1206 p1node, p2node = adjustedparents[revision.node]
1205 p1node, p2node = adjustedparents[revision.node]
1207 revision.p1node = p1node
1206 revision.p1node = p1node
1208 revision.p2node = p2node
1207 revision.p2node = p2node
1209 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
1208 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
1210
1209
1211 else:
1210 else:
1212 linknode = lookup(revision.node)
1211 linknode = lookup(revision.node)
1213
1212
1214 revision.linknode = linknode
1213 revision.linknode = linknode
1215 yield revision
1214 yield revision
1216
1215
1217 if progress:
1216 if progress:
1218 progress.complete()
1217 progress.complete()
1219
1218
1220
1219
1221 def make_debug_info():
1220 def make_debug_info():
1222 """ "build a "new" debug_info dictionnary
1221 """ "build a "new" debug_info dictionnary
1223
1222
1224 That dictionnary can be used to gather information about the bundle process
1223 That dictionnary can be used to gather information about the bundle process
1225 """
1224 """
1226 return {
1225 return {
1227 'revision-total': 0,
1226 'revision-total': 0,
1228 'revision-changelog': 0,
1227 'revision-changelog': 0,
1229 'revision-manifest': 0,
1228 'revision-manifest': 0,
1230 'revision-files': 0,
1229 'revision-files': 0,
1231 'file-count': 0,
1230 'file-count': 0,
1232 'merge-total': 0,
1231 'merge-total': 0,
1233 'available-delta': 0,
1232 'available-delta': 0,
1234 'available-full': 0,
1233 'available-full': 0,
1235 'delta-against-prev': 0,
1234 'delta-against-prev': 0,
1236 'delta-full': 0,
1235 'delta-full': 0,
1237 'delta-against-p1': 0,
1236 'delta-against-p1': 0,
1238 'denied-delta-candeltafn': 0,
1237 'denied-delta-candeltafn': 0,
1239 'denied-base-not-available': 0,
1238 'denied-base-not-available': 0,
1240 'reused-storage-delta': 0,
1239 'reused-storage-delta': 0,
1241 'computed-delta': 0,
1240 'computed-delta': 0,
1242 }
1241 }
1243
1242
1244
1243
1245 def merge_debug_info(base, other):
1244 def merge_debug_info(base, other):
1246 """merge the debug information from <other> into <base>
1245 """merge the debug information from <other> into <base>
1247
1246
1248 This function can be used to gather lower level information into higher level ones.
1247 This function can be used to gather lower level information into higher level ones.
1249 """
1248 """
1250 for key in (
1249 for key in (
1251 'revision-total',
1250 'revision-total',
1252 'revision-changelog',
1251 'revision-changelog',
1253 'revision-manifest',
1252 'revision-manifest',
1254 'revision-files',
1253 'revision-files',
1255 'merge-total',
1254 'merge-total',
1256 'available-delta',
1255 'available-delta',
1257 'available-full',
1256 'available-full',
1258 'delta-against-prev',
1257 'delta-against-prev',
1259 'delta-full',
1258 'delta-full',
1260 'delta-against-p1',
1259 'delta-against-p1',
1261 'denied-delta-candeltafn',
1260 'denied-delta-candeltafn',
1262 'denied-base-not-available',
1261 'denied-base-not-available',
1263 'reused-storage-delta',
1262 'reused-storage-delta',
1264 'computed-delta',
1263 'computed-delta',
1265 ):
1264 ):
1266 base[key] += other[key]
1265 base[key] += other[key]
1267
1266
1268
1267
1269 _KEY_PART_WIDTH = 17
1268 _KEY_PART_WIDTH = 17
1270
1269
1271
1270
1272 def _dbg_bdl_line(
1271 def _dbg_bdl_line(
1273 ui,
1272 ui,
1274 indent,
1273 indent,
1275 key,
1274 key,
1276 base_value=None,
1275 base_value=None,
1277 percentage_base=None,
1276 percentage_base=None,
1278 percentage_key=None,
1277 percentage_key=None,
1279 percentage_ref=None,
1278 percentage_ref=None,
1280 extra=None,
1279 extra=None,
1281 ):
1280 ):
1282 """Print one line of debug_bundle_debug_info"""
1281 """Print one line of debug_bundle_debug_info"""
1283 line = b"DEBUG-BUNDLING: "
1282 line = b"DEBUG-BUNDLING: "
1284 line += b' ' * (2 * indent)
1283 line += b' ' * (2 * indent)
1285 key += b":"
1284 key += b":"
1286 if base_value is not None:
1285 if base_value is not None:
1287 assert len(key) + 1 + (2 * indent) <= _KEY_PART_WIDTH
1286 assert len(key) + 1 + (2 * indent) <= _KEY_PART_WIDTH
1288 line += key.ljust(_KEY_PART_WIDTH - (2 * indent))
1287 line += key.ljust(_KEY_PART_WIDTH - (2 * indent))
1289 line += b"%10d" % base_value
1288 line += b"%10d" % base_value
1290 else:
1289 else:
1291 line += key
1290 line += key
1292
1291
1293 if percentage_base is not None:
1292 if percentage_base is not None:
1294 assert base_value is not None
1293 assert base_value is not None
1295 percentage = base_value * 100 // percentage_base
1294 percentage = base_value * 100 // percentage_base
1296 if percentage_key is not None:
1295 if percentage_key is not None:
1297 line += b" (%d%% of %s %d)" % (
1296 line += b" (%d%% of %s %d)" % (
1298 percentage,
1297 percentage,
1299 percentage_key,
1298 percentage_key,
1300 percentage_ref,
1299 percentage_ref,
1301 )
1300 )
1302 else:
1301 else:
1303 line += b" (%d%%)" % percentage
1302 line += b" (%d%%)" % percentage
1304
1303
1305 if extra:
1304 if extra:
1306 line += b" "
1305 line += b" "
1307 line += extra
1306 line += extra
1308
1307
1309 line += b'\n'
1308 line += b'\n'
1310 ui.write_err(line)
1309 ui.write_err(line)
1311
1310
1312
1311
1313 def display_bundling_debug_info(
1312 def display_bundling_debug_info(
1314 ui,
1313 ui,
1315 debug_info,
1314 debug_info,
1316 cl_debug_info,
1315 cl_debug_info,
1317 mn_debug_info,
1316 mn_debug_info,
1318 fl_debug_info,
1317 fl_debug_info,
1319 ):
1318 ):
1320 """display debug information gathered during a bundling through `ui`"""
1319 """display debug information gathered during a bundling through `ui`"""
1321 d = debug_info
1320 d = debug_info
1322 c = cl_debug_info
1321 c = cl_debug_info
1323 m = mn_debug_info
1322 m = mn_debug_info
1324 f = fl_debug_info
1323 f = fl_debug_info
1325 all_info = [
1324 all_info = [
1326 (b"changelog", b"cl", c),
1325 (b"changelog", b"cl", c),
1327 (b"manifests", b"mn", m),
1326 (b"manifests", b"mn", m),
1328 (b"files", b"fl", f),
1327 (b"files", b"fl", f),
1329 ]
1328 ]
1330 _dbg_bdl_line(ui, 0, b'revisions', d['revision-total'])
1329 _dbg_bdl_line(ui, 0, b'revisions', d['revision-total'])
1331 _dbg_bdl_line(ui, 1, b'changelog', d['revision-changelog'])
1330 _dbg_bdl_line(ui, 1, b'changelog', d['revision-changelog'])
1332 _dbg_bdl_line(ui, 1, b'manifest', d['revision-manifest'])
1331 _dbg_bdl_line(ui, 1, b'manifest', d['revision-manifest'])
1333 extra = b'(for %d revlogs)' % d['file-count']
1332 extra = b'(for %d revlogs)' % d['file-count']
1334 _dbg_bdl_line(ui, 1, b'files', d['revision-files'], extra=extra)
1333 _dbg_bdl_line(ui, 1, b'files', d['revision-files'], extra=extra)
1335 if d['merge-total']:
1334 if d['merge-total']:
1336 _dbg_bdl_line(ui, 1, b'merge', d['merge-total'], d['revision-total'])
1335 _dbg_bdl_line(ui, 1, b'merge', d['merge-total'], d['revision-total'])
1337 for k, __, v in all_info:
1336 for k, __, v in all_info:
1338 if v['merge-total']:
1337 if v['merge-total']:
1339 _dbg_bdl_line(ui, 2, k, v['merge-total'], v['revision-total'])
1338 _dbg_bdl_line(ui, 2, k, v['merge-total'], v['revision-total'])
1340
1339
1341 _dbg_bdl_line(ui, 0, b'deltas')
1340 _dbg_bdl_line(ui, 0, b'deltas')
1342 _dbg_bdl_line(
1341 _dbg_bdl_line(
1343 ui,
1342 ui,
1344 1,
1343 1,
1345 b'from-storage',
1344 b'from-storage',
1346 d['reused-storage-delta'],
1345 d['reused-storage-delta'],
1347 percentage_base=d['available-delta'],
1346 percentage_base=d['available-delta'],
1348 percentage_key=b"available",
1347 percentage_key=b"available",
1349 percentage_ref=d['available-delta'],
1348 percentage_ref=d['available-delta'],
1350 )
1349 )
1351
1350
1352 if d['denied-delta-candeltafn']:
1351 if d['denied-delta-candeltafn']:
1353 _dbg_bdl_line(ui, 2, b'denied-fn', d['denied-delta-candeltafn'])
1352 _dbg_bdl_line(ui, 2, b'denied-fn', d['denied-delta-candeltafn'])
1354 for __, k, v in all_info:
1353 for __, k, v in all_info:
1355 if v['denied-delta-candeltafn']:
1354 if v['denied-delta-candeltafn']:
1356 _dbg_bdl_line(ui, 3, k, v['denied-delta-candeltafn'])
1355 _dbg_bdl_line(ui, 3, k, v['denied-delta-candeltafn'])
1357
1356
1358 if d['denied-base-not-available']:
1357 if d['denied-base-not-available']:
1359 _dbg_bdl_line(ui, 2, b'denied-nb', d['denied-base-not-available'])
1358 _dbg_bdl_line(ui, 2, b'denied-nb', d['denied-base-not-available'])
1360 for k, __, v in all_info:
1359 for k, __, v in all_info:
1361 if v['denied-base-not-available']:
1360 if v['denied-base-not-available']:
1362 _dbg_bdl_line(ui, 3, k, v['denied-base-not-available'])
1361 _dbg_bdl_line(ui, 3, k, v['denied-base-not-available'])
1363
1362
1364 if d['computed-delta']:
1363 if d['computed-delta']:
1365 _dbg_bdl_line(ui, 1, b'computed', d['computed-delta'])
1364 _dbg_bdl_line(ui, 1, b'computed', d['computed-delta'])
1366
1365
1367 if d['available-full']:
1366 if d['available-full']:
1368 _dbg_bdl_line(
1367 _dbg_bdl_line(
1369 ui,
1368 ui,
1370 2,
1369 2,
1371 b'full',
1370 b'full',
1372 d['delta-full'],
1371 d['delta-full'],
1373 percentage_base=d['available-full'],
1372 percentage_base=d['available-full'],
1374 percentage_key=b"native",
1373 percentage_key=b"native",
1375 percentage_ref=d['available-full'],
1374 percentage_ref=d['available-full'],
1376 )
1375 )
1377 for k, __, v in all_info:
1376 for k, __, v in all_info:
1378 if v['available-full']:
1377 if v['available-full']:
1379 _dbg_bdl_line(
1378 _dbg_bdl_line(
1380 ui,
1379 ui,
1381 3,
1380 3,
1382 k,
1381 k,
1383 v['delta-full'],
1382 v['delta-full'],
1384 percentage_base=v['available-full'],
1383 percentage_base=v['available-full'],
1385 percentage_key=b"native",
1384 percentage_key=b"native",
1386 percentage_ref=v['available-full'],
1385 percentage_ref=v['available-full'],
1387 )
1386 )
1388
1387
1389 if d['delta-against-prev']:
1388 if d['delta-against-prev']:
1390 _dbg_bdl_line(ui, 2, b'previous', d['delta-against-prev'])
1389 _dbg_bdl_line(ui, 2, b'previous', d['delta-against-prev'])
1391 for k, __, v in all_info:
1390 for k, __, v in all_info:
1392 if v['delta-against-prev']:
1391 if v['delta-against-prev']:
1393 _dbg_bdl_line(ui, 3, k, v['delta-against-prev'])
1392 _dbg_bdl_line(ui, 3, k, v['delta-against-prev'])
1394
1393
1395 if d['delta-against-p1']:
1394 if d['delta-against-p1']:
1396 _dbg_bdl_line(ui, 2, b'parent-1', d['delta-against-prev'])
1395 _dbg_bdl_line(ui, 2, b'parent-1', d['delta-against-prev'])
1397 for k, __, v in all_info:
1396 for k, __, v in all_info:
1398 if v['delta-against-p1']:
1397 if v['delta-against-p1']:
1399 _dbg_bdl_line(ui, 3, k, v['delta-against-p1'])
1398 _dbg_bdl_line(ui, 3, k, v['delta-against-p1'])
1400
1399
1401
1400
1402 class cgpacker:
1401 class cgpacker:
1403 def __init__(
1402 def __init__(
1404 self,
1403 self,
1405 repo,
1404 repo,
1406 oldmatcher,
1405 oldmatcher,
1407 matcher,
1406 matcher,
1408 version,
1407 version,
1409 builddeltaheader,
1408 builddeltaheader,
1410 manifestsend,
1409 manifestsend,
1411 forcedeltaparentprev=False,
1410 forcedeltaparentprev=False,
1412 bundlecaps=None,
1411 bundlecaps=None,
1413 ellipses=False,
1412 ellipses=False,
1414 shallow=False,
1413 shallow=False,
1415 ellipsisroots=None,
1414 ellipsisroots=None,
1416 fullnodes=None,
1415 fullnodes=None,
1417 remote_sidedata=None,
1416 remote_sidedata=None,
1418 ):
1417 ):
1419 """Given a source repo, construct a bundler.
1418 """Given a source repo, construct a bundler.
1420
1419
1421 oldmatcher is a matcher that matches on files the client already has.
1420 oldmatcher is a matcher that matches on files the client already has.
1422 These will not be included in the changegroup.
1421 These will not be included in the changegroup.
1423
1422
1424 matcher is a matcher that matches on files to include in the
1423 matcher is a matcher that matches on files to include in the
1425 changegroup. Used to facilitate sparse changegroups.
1424 changegroup. Used to facilitate sparse changegroups.
1426
1425
1427 forcedeltaparentprev indicates whether delta parents must be against
1426 forcedeltaparentprev indicates whether delta parents must be against
1428 the previous revision in a delta group. This should only be used for
1427 the previous revision in a delta group. This should only be used for
1429 compatibility with changegroup version 1.
1428 compatibility with changegroup version 1.
1430
1429
1431 builddeltaheader is a callable that constructs the header for a group
1430 builddeltaheader is a callable that constructs the header for a group
1432 delta.
1431 delta.
1433
1432
1434 manifestsend is a chunk to send after manifests have been fully emitted.
1433 manifestsend is a chunk to send after manifests have been fully emitted.
1435
1434
1436 ellipses indicates whether ellipsis serving mode is enabled.
1435 ellipses indicates whether ellipsis serving mode is enabled.
1437
1436
1438 bundlecaps is optional and can be used to specify the set of
1437 bundlecaps is optional and can be used to specify the set of
1439 capabilities which can be used to build the bundle. While bundlecaps is
1438 capabilities which can be used to build the bundle. While bundlecaps is
1440 unused in core Mercurial, extensions rely on this feature to communicate
1439 unused in core Mercurial, extensions rely on this feature to communicate
1441 capabilities to customize the changegroup packer.
1440 capabilities to customize the changegroup packer.
1442
1441
1443 shallow indicates whether shallow data might be sent. The packer may
1442 shallow indicates whether shallow data might be sent. The packer may
1444 need to pack file contents not introduced by the changes being packed.
1443 need to pack file contents not introduced by the changes being packed.
1445
1444
1446 fullnodes is the set of changelog nodes which should not be ellipsis
1445 fullnodes is the set of changelog nodes which should not be ellipsis
1447 nodes. We store this rather than the set of nodes that should be
1446 nodes. We store this rather than the set of nodes that should be
1448 ellipsis because for very large histories we expect this to be
1447 ellipsis because for very large histories we expect this to be
1449 significantly smaller.
1448 significantly smaller.
1450
1449
1451 remote_sidedata is the set of sidedata categories wanted by the remote.
1450 remote_sidedata is the set of sidedata categories wanted by the remote.
1452 """
1451 """
1453 assert oldmatcher
1452 assert oldmatcher
1454 assert matcher
1453 assert matcher
1455 self._oldmatcher = oldmatcher
1454 self._oldmatcher = oldmatcher
1456 self._matcher = matcher
1455 self._matcher = matcher
1457
1456
1458 self.version = version
1457 self.version = version
1459 self._forcedeltaparentprev = forcedeltaparentprev
1458 self._forcedeltaparentprev = forcedeltaparentprev
1460 self._builddeltaheader = builddeltaheader
1459 self._builddeltaheader = builddeltaheader
1461 self._manifestsend = manifestsend
1460 self._manifestsend = manifestsend
1462 self._ellipses = ellipses
1461 self._ellipses = ellipses
1463
1462
1464 # Set of capabilities we can use to build the bundle.
1463 # Set of capabilities we can use to build the bundle.
1465 if bundlecaps is None:
1464 if bundlecaps is None:
1466 bundlecaps = set()
1465 bundlecaps = set()
1467 self._bundlecaps = bundlecaps
1466 self._bundlecaps = bundlecaps
1468 if remote_sidedata is None:
1467 if remote_sidedata is None:
1469 remote_sidedata = set()
1468 remote_sidedata = set()
1470 self._remote_sidedata = remote_sidedata
1469 self._remote_sidedata = remote_sidedata
1471 self._isshallow = shallow
1470 self._isshallow = shallow
1472 self._fullclnodes = fullnodes
1471 self._fullclnodes = fullnodes
1473
1472
1474 # Maps ellipsis revs to their roots at the changelog level.
1473 # Maps ellipsis revs to their roots at the changelog level.
1475 self._precomputedellipsis = ellipsisroots
1474 self._precomputedellipsis = ellipsisroots
1476
1475
1477 self._repo = repo
1476 self._repo = repo
1478
1477
1479 if self._repo.ui.verbose and not self._repo.ui.debugflag:
1478 if self._repo.ui.verbose and not self._repo.ui.debugflag:
1480 self._verbosenote = self._repo.ui.note
1479 self._verbosenote = self._repo.ui.note
1481 else:
1480 else:
1482 self._verbosenote = lambda s: None
1481 self._verbosenote = lambda s: None
1483
1482
1484 def generate(
1483 def generate(
1485 self,
1484 self,
1486 commonrevs,
1485 commonrevs,
1487 clnodes,
1486 clnodes,
1488 fastpathlinkrev,
1487 fastpathlinkrev,
1489 source,
1488 source,
1490 changelog=True,
1489 changelog=True,
1491 ):
1490 ):
1492 """Yield a sequence of changegroup byte chunks.
1491 """Yield a sequence of changegroup byte chunks.
1493 If changelog is False, changelog data won't be added to changegroup
1492 If changelog is False, changelog data won't be added to changegroup
1494 """
1493 """
1495
1494
1496 debug_info = None
1495 debug_info = None
1497 repo = self._repo
1496 repo = self._repo
1498 if repo.ui.configbool(b'debug', b'bundling-stats'):
1497 if repo.ui.configbool(b'debug', b'bundling-stats'):
1499 debug_info = make_debug_info()
1498 debug_info = make_debug_info()
1500 cl = repo.changelog
1499 cl = repo.changelog
1501
1500
1502 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
1501 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
1503 size = 0
1502 size = 0
1504
1503
1505 sidedata_helpers = None
1504 sidedata_helpers = None
1506 if self.version == b'04':
1505 if self.version == b'04':
1507 remote_sidedata = self._remote_sidedata
1506 remote_sidedata = self._remote_sidedata
1508 if source == b'strip':
1507 if source == b'strip':
1509 # We're our own remote when stripping, get the no-op helpers
1508 # We're our own remote when stripping, get the no-op helpers
1510 # TODO a better approach would be for the strip bundle to
1509 # TODO a better approach would be for the strip bundle to
1511 # correctly advertise its sidedata categories directly.
1510 # correctly advertise its sidedata categories directly.
1512 remote_sidedata = repo._wanted_sidedata
1511 remote_sidedata = repo._wanted_sidedata
1513 sidedata_helpers = sidedatamod.get_sidedata_helpers(
1512 sidedata_helpers = sidedatamod.get_sidedata_helpers(
1514 repo,
1513 repo,
1515 remote_sidedata,
1514 remote_sidedata,
1516 )
1515 )
1517
1516
1518 cl_debug_info = None
1517 cl_debug_info = None
1519 if debug_info is not None:
1518 if debug_info is not None:
1520 cl_debug_info = make_debug_info()
1519 cl_debug_info = make_debug_info()
1521 clstate, deltas = self._generatechangelog(
1520 clstate, deltas = self._generatechangelog(
1522 cl,
1521 cl,
1523 clnodes,
1522 clnodes,
1524 generate=changelog,
1523 generate=changelog,
1525 sidedata_helpers=sidedata_helpers,
1524 sidedata_helpers=sidedata_helpers,
1526 debug_info=cl_debug_info,
1525 debug_info=cl_debug_info,
1527 )
1526 )
1528 for delta in deltas:
1527 for delta in deltas:
1529 for chunk in _revisiondeltatochunks(
1528 for chunk in _revisiondeltatochunks(
1530 self._repo, delta, self._builddeltaheader
1529 self._repo, delta, self._builddeltaheader
1531 ):
1530 ):
1532 size += len(chunk)
1531 size += len(chunk)
1533 yield chunk
1532 yield chunk
1534
1533
1535 close = closechunk()
1534 close = closechunk()
1536 size += len(close)
1535 size += len(close)
1537 yield closechunk()
1536 yield closechunk()
1538 if debug_info is not None:
1537 if debug_info is not None:
1539 merge_debug_info(debug_info, cl_debug_info)
1538 merge_debug_info(debug_info, cl_debug_info)
1540 debug_info['revision-changelog'] = cl_debug_info['revision-total']
1539 debug_info['revision-changelog'] = cl_debug_info['revision-total']
1541
1540
1542 self._verbosenote(_(b'%8.i (changelog)\n') % size)
1541 self._verbosenote(_(b'%8.i (changelog)\n') % size)
1543
1542
1544 clrevorder = clstate[b'clrevorder']
1543 clrevorder = clstate[b'clrevorder']
1545 manifests = clstate[b'manifests']
1544 manifests = clstate[b'manifests']
1546 changedfiles = clstate[b'changedfiles']
1545 changedfiles = clstate[b'changedfiles']
1547
1546
1548 if debug_info is not None:
1547 if debug_info is not None:
1549 debug_info['file-count'] = len(changedfiles)
1548 debug_info['file-count'] = len(changedfiles)
1550
1549
1551 # We need to make sure that the linkrev in the changegroup refers to
1550 # We need to make sure that the linkrev in the changegroup refers to
1552 # the first changeset that introduced the manifest or file revision.
1551 # the first changeset that introduced the manifest or file revision.
1553 # The fastpath is usually safer than the slowpath, because the filelogs
1552 # The fastpath is usually safer than the slowpath, because the filelogs
1554 # are walked in revlog order.
1553 # are walked in revlog order.
1555 #
1554 #
1556 # When taking the slowpath when the manifest revlog uses generaldelta,
1555 # When taking the slowpath when the manifest revlog uses generaldelta,
1557 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
1556 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
1558 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
1557 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
1559 #
1558 #
1560 # When taking the fastpath, we are only vulnerable to reordering
1559 # When taking the fastpath, we are only vulnerable to reordering
1561 # of the changelog itself. The changelog never uses generaldelta and is
1560 # of the changelog itself. The changelog never uses generaldelta and is
1562 # never reordered. To handle this case, we simply take the slowpath,
1561 # never reordered. To handle this case, we simply take the slowpath,
1563 # which already has the 'clrevorder' logic. This was also fixed in
1562 # which already has the 'clrevorder' logic. This was also fixed in
1564 # cc0ff93d0c0c.
1563 # cc0ff93d0c0c.
1565
1564
1566 # Treemanifests don't work correctly with fastpathlinkrev
1565 # Treemanifests don't work correctly with fastpathlinkrev
1567 # either, because we don't discover which directory nodes to
1566 # either, because we don't discover which directory nodes to
1568 # send along with files. This could probably be fixed.
1567 # send along with files. This could probably be fixed.
1569 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
1568 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
1570
1569
1571 fnodes = {} # needed file nodes
1570 fnodes = {} # needed file nodes
1572
1571
1573 size = 0
1572 size = 0
1574 mn_debug_info = None
1573 mn_debug_info = None
1575 if debug_info is not None:
1574 if debug_info is not None:
1576 mn_debug_info = make_debug_info()
1575 mn_debug_info = make_debug_info()
1577 it = self.generatemanifests(
1576 it = self.generatemanifests(
1578 commonrevs,
1577 commonrevs,
1579 clrevorder,
1578 clrevorder,
1580 fastpathlinkrev,
1579 fastpathlinkrev,
1581 manifests,
1580 manifests,
1582 fnodes,
1581 fnodes,
1583 source,
1582 source,
1584 clstate[b'clrevtomanifestrev'],
1583 clstate[b'clrevtomanifestrev'],
1585 sidedata_helpers=sidedata_helpers,
1584 sidedata_helpers=sidedata_helpers,
1586 debug_info=mn_debug_info,
1585 debug_info=mn_debug_info,
1587 )
1586 )
1588
1587
1589 for tree, deltas in it:
1588 for tree, deltas in it:
1590 if tree:
1589 if tree:
1591 assert self.version in (b'03', b'04')
1590 assert self.version in (b'03', b'04')
1592 chunk = _fileheader(tree)
1591 chunk = _fileheader(tree)
1593 size += len(chunk)
1592 size += len(chunk)
1594 yield chunk
1593 yield chunk
1595
1594
1596 for delta in deltas:
1595 for delta in deltas:
1597 chunks = _revisiondeltatochunks(
1596 chunks = _revisiondeltatochunks(
1598 self._repo, delta, self._builddeltaheader
1597 self._repo, delta, self._builddeltaheader
1599 )
1598 )
1600 for chunk in chunks:
1599 for chunk in chunks:
1601 size += len(chunk)
1600 size += len(chunk)
1602 yield chunk
1601 yield chunk
1603
1602
1604 close = closechunk()
1603 close = closechunk()
1605 size += len(close)
1604 size += len(close)
1606 yield close
1605 yield close
1607 if debug_info is not None:
1606 if debug_info is not None:
1608 merge_debug_info(debug_info, mn_debug_info)
1607 merge_debug_info(debug_info, mn_debug_info)
1609 debug_info['revision-manifest'] = mn_debug_info['revision-total']
1608 debug_info['revision-manifest'] = mn_debug_info['revision-total']
1610
1609
1611 self._verbosenote(_(b'%8.i (manifests)\n') % size)
1610 self._verbosenote(_(b'%8.i (manifests)\n') % size)
1612 yield self._manifestsend
1611 yield self._manifestsend
1613
1612
1614 mfdicts = None
1613 mfdicts = None
1615 if self._ellipses and self._isshallow:
1614 if self._ellipses and self._isshallow:
1616 mfdicts = [
1615 mfdicts = [
1617 (repo.manifestlog[n].read(), lr)
1616 (repo.manifestlog[n].read(), lr)
1618 for (n, lr) in pycompat.iteritems(manifests)
1617 for (n, lr) in pycompat.iteritems(manifests)
1619 ]
1618 ]
1620
1619
1621 manifests.clear()
1620 manifests.clear()
1622 clrevs = {cl.rev(x) for x in clnodes}
1621 clrevs = {cl.rev(x) for x in clnodes}
1623
1622
1624 fl_debug_info = None
1623 fl_debug_info = None
1625 if debug_info is not None:
1624 if debug_info is not None:
1626 fl_debug_info = make_debug_info()
1625 fl_debug_info = make_debug_info()
1627 it = self.generatefiles(
1626 it = self.generatefiles(
1628 changedfiles,
1627 changedfiles,
1629 commonrevs,
1628 commonrevs,
1630 source,
1629 source,
1631 mfdicts,
1630 mfdicts,
1632 fastpathlinkrev,
1631 fastpathlinkrev,
1633 fnodes,
1632 fnodes,
1634 clrevs,
1633 clrevs,
1635 sidedata_helpers=sidedata_helpers,
1634 sidedata_helpers=sidedata_helpers,
1636 debug_info=fl_debug_info,
1635 debug_info=fl_debug_info,
1637 )
1636 )
1638
1637
1639 for path, deltas in it:
1638 for path, deltas in it:
1640 h = _fileheader(path)
1639 h = _fileheader(path)
1641 size = len(h)
1640 size = len(h)
1642 yield h
1641 yield h
1643
1642
1644 for delta in deltas:
1643 for delta in deltas:
1645 chunks = _revisiondeltatochunks(
1644 chunks = _revisiondeltatochunks(
1646 self._repo, delta, self._builddeltaheader
1645 self._repo, delta, self._builddeltaheader
1647 )
1646 )
1648 for chunk in chunks:
1647 for chunk in chunks:
1649 size += len(chunk)
1648 size += len(chunk)
1650 yield chunk
1649 yield chunk
1651
1650
1652 close = closechunk()
1651 close = closechunk()
1653 size += len(close)
1652 size += len(close)
1654 yield close
1653 yield close
1655
1654
1656 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1655 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1657
1656
1658 yield closechunk()
1657 yield closechunk()
1659 if debug_info is not None:
1658 if debug_info is not None:
1660 merge_debug_info(debug_info, fl_debug_info)
1659 merge_debug_info(debug_info, fl_debug_info)
1661 debug_info['revision-files'] = fl_debug_info['revision-total']
1660 debug_info['revision-files'] = fl_debug_info['revision-total']
1662
1661
1663 if debug_info is not None:
1662 if debug_info is not None:
1664 display_bundling_debug_info(
1663 display_bundling_debug_info(
1665 repo.ui,
1664 repo.ui,
1666 debug_info,
1665 debug_info,
1667 cl_debug_info,
1666 cl_debug_info,
1668 mn_debug_info,
1667 mn_debug_info,
1669 fl_debug_info,
1668 fl_debug_info,
1670 )
1669 )
1671
1670
1672 if clnodes:
1671 if clnodes:
1673 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1672 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1674
1673
1675 def _generatechangelog(
1674 def _generatechangelog(
1676 self,
1675 self,
1677 cl,
1676 cl,
1678 nodes,
1677 nodes,
1679 generate=True,
1678 generate=True,
1680 sidedata_helpers=None,
1679 sidedata_helpers=None,
1681 debug_info=None,
1680 debug_info=None,
1682 ):
1681 ):
1683 """Generate data for changelog chunks.
1682 """Generate data for changelog chunks.
1684
1683
1685 Returns a 2-tuple of a dict containing state and an iterable of
1684 Returns a 2-tuple of a dict containing state and an iterable of
1686 byte chunks. The state will not be fully populated until the
1685 byte chunks. The state will not be fully populated until the
1687 chunk stream has been fully consumed.
1686 chunk stream has been fully consumed.
1688
1687
1689 if generate is False, the state will be fully populated and no chunk
1688 if generate is False, the state will be fully populated and no chunk
1690 stream will be yielded
1689 stream will be yielded
1691
1690
1692 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
1691 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
1693 `sidedata_helpers`.
1692 `sidedata_helpers`.
1694 """
1693 """
1695 clrevorder = {}
1694 clrevorder = {}
1696 manifests = {}
1695 manifests = {}
1697 mfl = self._repo.manifestlog
1696 mfl = self._repo.manifestlog
1698 changedfiles = set()
1697 changedfiles = set()
1699 clrevtomanifestrev = {}
1698 clrevtomanifestrev = {}
1700
1699
1701 state = {
1700 state = {
1702 b'clrevorder': clrevorder,
1701 b'clrevorder': clrevorder,
1703 b'manifests': manifests,
1702 b'manifests': manifests,
1704 b'changedfiles': changedfiles,
1703 b'changedfiles': changedfiles,
1705 b'clrevtomanifestrev': clrevtomanifestrev,
1704 b'clrevtomanifestrev': clrevtomanifestrev,
1706 }
1705 }
1707
1706
1708 if not (generate or self._ellipses):
1707 if not (generate or self._ellipses):
1709 # sort the nodes in storage order
1708 # sort the nodes in storage order
1710 nodes = sorted(nodes, key=cl.rev)
1709 nodes = sorted(nodes, key=cl.rev)
1711 for node in nodes:
1710 for node in nodes:
1712 c = cl.changelogrevision(node)
1711 c = cl.changelogrevision(node)
1713 clrevorder[node] = len(clrevorder)
1712 clrevorder[node] = len(clrevorder)
1714 # record the first changeset introducing this manifest version
1713 # record the first changeset introducing this manifest version
1715 manifests.setdefault(c.manifest, node)
1714 manifests.setdefault(c.manifest, node)
1716 # Record a complete list of potentially-changed files in
1715 # Record a complete list of potentially-changed files in
1717 # this manifest.
1716 # this manifest.
1718 changedfiles.update(c.files)
1717 changedfiles.update(c.files)
1719
1718
1720 return state, ()
1719 return state, ()
1721
1720
1722 # Callback for the changelog, used to collect changed files and
1721 # Callback for the changelog, used to collect changed files and
1723 # manifest nodes.
1722 # manifest nodes.
1724 # Returns the linkrev node (identity in the changelog case).
1723 # Returns the linkrev node (identity in the changelog case).
1725 def lookupcl(x):
1724 def lookupcl(x):
1726 c = cl.changelogrevision(x)
1725 c = cl.changelogrevision(x)
1727 clrevorder[x] = len(clrevorder)
1726 clrevorder[x] = len(clrevorder)
1728
1727
1729 if self._ellipses:
1728 if self._ellipses:
1730 # Only update manifests if x is going to be sent. Otherwise we
1729 # Only update manifests if x is going to be sent. Otherwise we
1731 # end up with bogus linkrevs specified for manifests and
1730 # end up with bogus linkrevs specified for manifests and
1732 # we skip some manifest nodes that we should otherwise
1731 # we skip some manifest nodes that we should otherwise
1733 # have sent.
1732 # have sent.
1734 if (
1733 if (
1735 x in self._fullclnodes
1734 x in self._fullclnodes
1736 or cl.rev(x) in self._precomputedellipsis
1735 or cl.rev(x) in self._precomputedellipsis
1737 ):
1736 ):
1738
1739 manifestnode = c.manifest
1737 manifestnode = c.manifest
1740 # Record the first changeset introducing this manifest
1738 # Record the first changeset introducing this manifest
1741 # version.
1739 # version.
1742 manifests.setdefault(manifestnode, x)
1740 manifests.setdefault(manifestnode, x)
1743 # Set this narrow-specific dict so we have the lowest
1741 # Set this narrow-specific dict so we have the lowest
1744 # manifest revnum to look up for this cl revnum. (Part of
1742 # manifest revnum to look up for this cl revnum. (Part of
1745 # mapping changelog ellipsis parents to manifest ellipsis
1743 # mapping changelog ellipsis parents to manifest ellipsis
1746 # parents)
1744 # parents)
1747 clrevtomanifestrev.setdefault(
1745 clrevtomanifestrev.setdefault(
1748 cl.rev(x), mfl.rev(manifestnode)
1746 cl.rev(x), mfl.rev(manifestnode)
1749 )
1747 )
1750 # We can't trust the changed files list in the changeset if the
1748 # We can't trust the changed files list in the changeset if the
1751 # client requested a shallow clone.
1749 # client requested a shallow clone.
1752 if self._isshallow:
1750 if self._isshallow:
1753 changedfiles.update(mfl[c.manifest].read().keys())
1751 changedfiles.update(mfl[c.manifest].read().keys())
1754 else:
1752 else:
1755 changedfiles.update(c.files)
1753 changedfiles.update(c.files)
1756 else:
1754 else:
1757 # record the first changeset introducing this manifest version
1755 # record the first changeset introducing this manifest version
1758 manifests.setdefault(c.manifest, x)
1756 manifests.setdefault(c.manifest, x)
1759 # Record a complete list of potentially-changed files in
1757 # Record a complete list of potentially-changed files in
1760 # this manifest.
1758 # this manifest.
1761 changedfiles.update(c.files)
1759 changedfiles.update(c.files)
1762
1760
1763 return x
1761 return x
1764
1762
1765 gen = deltagroup(
1763 gen = deltagroup(
1766 self._repo,
1764 self._repo,
1767 cl,
1765 cl,
1768 nodes,
1766 nodes,
1769 True,
1767 True,
1770 lookupcl,
1768 lookupcl,
1771 self._forcedeltaparentprev,
1769 self._forcedeltaparentprev,
1772 ellipses=self._ellipses,
1770 ellipses=self._ellipses,
1773 topic=_(b'changesets'),
1771 topic=_(b'changesets'),
1774 clrevtolocalrev={},
1772 clrevtolocalrev={},
1775 fullclnodes=self._fullclnodes,
1773 fullclnodes=self._fullclnodes,
1776 precomputedellipsis=self._precomputedellipsis,
1774 precomputedellipsis=self._precomputedellipsis,
1777 sidedata_helpers=sidedata_helpers,
1775 sidedata_helpers=sidedata_helpers,
1778 debug_info=debug_info,
1776 debug_info=debug_info,
1779 )
1777 )
1780
1778
1781 return state, gen
1779 return state, gen
1782
1780
1783 def generatemanifests(
1781 def generatemanifests(
1784 self,
1782 self,
1785 commonrevs,
1783 commonrevs,
1786 clrevorder,
1784 clrevorder,
1787 fastpathlinkrev,
1785 fastpathlinkrev,
1788 manifests,
1786 manifests,
1789 fnodes,
1787 fnodes,
1790 source,
1788 source,
1791 clrevtolocalrev,
1789 clrevtolocalrev,
1792 sidedata_helpers=None,
1790 sidedata_helpers=None,
1793 debug_info=None,
1791 debug_info=None,
1794 ):
1792 ):
1795 """Returns an iterator of changegroup chunks containing manifests.
1793 """Returns an iterator of changegroup chunks containing manifests.
1796
1794
1797 `source` is unused here, but is used by extensions like remotefilelog to
1795 `source` is unused here, but is used by extensions like remotefilelog to
1798 change what is sent based in pulls vs pushes, etc.
1796 change what is sent based in pulls vs pushes, etc.
1799
1797
1800 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
1798 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
1801 `sidedata_helpers`.
1799 `sidedata_helpers`.
1802 """
1800 """
1803 repo = self._repo
1801 repo = self._repo
1804 mfl = repo.manifestlog
1802 mfl = repo.manifestlog
1805 tmfnodes = {b'': manifests}
1803 tmfnodes = {b'': manifests}
1806
1804
1807 # Callback for the manifest, used to collect linkrevs for filelog
1805 # Callback for the manifest, used to collect linkrevs for filelog
1808 # revisions.
1806 # revisions.
1809 # Returns the linkrev node (collected in lookupcl).
1807 # Returns the linkrev node (collected in lookupcl).
1810 def makelookupmflinknode(tree, nodes):
1808 def makelookupmflinknode(tree, nodes):
1811 if fastpathlinkrev:
1809 if fastpathlinkrev:
1812 assert not tree
1810 assert not tree
1813
1811
1814 # pytype: disable=unsupported-operands
1812 # pytype: disable=unsupported-operands
1815 return manifests.__getitem__
1813 return manifests.__getitem__
1816 # pytype: enable=unsupported-operands
1814 # pytype: enable=unsupported-operands
1817
1815
1818 def lookupmflinknode(x):
1816 def lookupmflinknode(x):
1819 """Callback for looking up the linknode for manifests.
1817 """Callback for looking up the linknode for manifests.
1820
1818
1821 Returns the linkrev node for the specified manifest.
1819 Returns the linkrev node for the specified manifest.
1822
1820
1823 SIDE EFFECT:
1821 SIDE EFFECT:
1824
1822
1825 1) fclnodes gets populated with the list of relevant
1823 1) fclnodes gets populated with the list of relevant
1826 file nodes if we're not using fastpathlinkrev
1824 file nodes if we're not using fastpathlinkrev
1827 2) When treemanifests are in use, collects treemanifest nodes
1825 2) When treemanifests are in use, collects treemanifest nodes
1828 to send
1826 to send
1829
1827
1830 Note that this means manifests must be completely sent to
1828 Note that this means manifests must be completely sent to
1831 the client before you can trust the list of files and
1829 the client before you can trust the list of files and
1832 treemanifests to send.
1830 treemanifests to send.
1833 """
1831 """
1834 clnode = nodes[x]
1832 clnode = nodes[x]
1835 mdata = mfl.get(tree, x).readfast(shallow=True)
1833 mdata = mfl.get(tree, x).readfast(shallow=True)
1836 for p, n, fl in mdata.iterentries():
1834 for p, n, fl in mdata.iterentries():
1837 if fl == b't': # subdirectory manifest
1835 if fl == b't': # subdirectory manifest
1838 subtree = tree + p + b'/'
1836 subtree = tree + p + b'/'
1839 tmfclnodes = tmfnodes.setdefault(subtree, {})
1837 tmfclnodes = tmfnodes.setdefault(subtree, {})
1840 tmfclnode = tmfclnodes.setdefault(n, clnode)
1838 tmfclnode = tmfclnodes.setdefault(n, clnode)
1841 if clrevorder[clnode] < clrevorder[tmfclnode]:
1839 if clrevorder[clnode] < clrevorder[tmfclnode]:
1842 tmfclnodes[n] = clnode
1840 tmfclnodes[n] = clnode
1843 else:
1841 else:
1844 f = tree + p
1842 f = tree + p
1845 fclnodes = fnodes.setdefault(f, {})
1843 fclnodes = fnodes.setdefault(f, {})
1846 fclnode = fclnodes.setdefault(n, clnode)
1844 fclnode = fclnodes.setdefault(n, clnode)
1847 if clrevorder[clnode] < clrevorder[fclnode]:
1845 if clrevorder[clnode] < clrevorder[fclnode]:
1848 fclnodes[n] = clnode
1846 fclnodes[n] = clnode
1849 return clnode
1847 return clnode
1850
1848
1851 return lookupmflinknode
1849 return lookupmflinknode
1852
1850
1853 while tmfnodes:
1851 while tmfnodes:
1854 tree, nodes = tmfnodes.popitem()
1852 tree, nodes = tmfnodes.popitem()
1855
1853
1856 should_visit = self._matcher.visitdir(tree[:-1])
1854 should_visit = self._matcher.visitdir(tree[:-1])
1857 if tree and not should_visit:
1855 if tree and not should_visit:
1858 continue
1856 continue
1859
1857
1860 store = mfl.getstorage(tree)
1858 store = mfl.getstorage(tree)
1861
1859
1862 if not should_visit:
1860 if not should_visit:
1863 # No nodes to send because this directory is out of
1861 # No nodes to send because this directory is out of
1864 # the client's view of the repository (probably
1862 # the client's view of the repository (probably
1865 # because of narrow clones). Do this even for the root
1863 # because of narrow clones). Do this even for the root
1866 # directory (tree=='')
1864 # directory (tree=='')
1867 prunednodes = []
1865 prunednodes = []
1868 else:
1866 else:
1869 # Avoid sending any manifest nodes we can prove the
1867 # Avoid sending any manifest nodes we can prove the
1870 # client already has by checking linkrevs. See the
1868 # client already has by checking linkrevs. See the
1871 # related comment in generatefiles().
1869 # related comment in generatefiles().
1872 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1870 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1873
1871
1874 if tree and not prunednodes:
1872 if tree and not prunednodes:
1875 continue
1873 continue
1876
1874
1877 lookupfn = makelookupmflinknode(tree, nodes)
1875 lookupfn = makelookupmflinknode(tree, nodes)
1878
1876
1879 deltas = deltagroup(
1877 deltas = deltagroup(
1880 self._repo,
1878 self._repo,
1881 store,
1879 store,
1882 prunednodes,
1880 prunednodes,
1883 False,
1881 False,
1884 lookupfn,
1882 lookupfn,
1885 self._forcedeltaparentprev,
1883 self._forcedeltaparentprev,
1886 ellipses=self._ellipses,
1884 ellipses=self._ellipses,
1887 topic=_(b'manifests'),
1885 topic=_(b'manifests'),
1888 clrevtolocalrev=clrevtolocalrev,
1886 clrevtolocalrev=clrevtolocalrev,
1889 fullclnodes=self._fullclnodes,
1887 fullclnodes=self._fullclnodes,
1890 precomputedellipsis=self._precomputedellipsis,
1888 precomputedellipsis=self._precomputedellipsis,
1891 sidedata_helpers=sidedata_helpers,
1889 sidedata_helpers=sidedata_helpers,
1892 debug_info=debug_info,
1890 debug_info=debug_info,
1893 )
1891 )
1894
1892
1895 if not self._oldmatcher.visitdir(store.tree[:-1]):
1893 if not self._oldmatcher.visitdir(store.tree[:-1]):
1896 yield tree, deltas
1894 yield tree, deltas
1897 else:
1895 else:
1898 # 'deltas' is a generator and we need to consume it even if
1896 # 'deltas' is a generator and we need to consume it even if
1899 # we are not going to send it because a side-effect is that
1897 # we are not going to send it because a side-effect is that
1900 # it updates tmdnodes (via lookupfn)
1898 # it updates tmdnodes (via lookupfn)
1901 for d in deltas:
1899 for d in deltas:
1902 pass
1900 pass
1903 if not tree:
1901 if not tree:
1904 yield tree, []
1902 yield tree, []
1905
1903
1906 def _prunemanifests(self, store, nodes, commonrevs):
1904 def _prunemanifests(self, store, nodes, commonrevs):
1907 if not self._ellipses:
1905 if not self._ellipses:
1908 # In non-ellipses case and large repositories, it is better to
1906 # In non-ellipses case and large repositories, it is better to
1909 # prevent calling of store.rev and store.linkrev on a lot of
1907 # prevent calling of store.rev and store.linkrev on a lot of
1910 # nodes as compared to sending some extra data
1908 # nodes as compared to sending some extra data
1911 return nodes.copy()
1909 return nodes.copy()
1912 # This is split out as a separate method to allow filtering
1910 # This is split out as a separate method to allow filtering
1913 # commonrevs in extension code.
1911 # commonrevs in extension code.
1914 #
1912 #
1915 # TODO(augie): this shouldn't be required, instead we should
1913 # TODO(augie): this shouldn't be required, instead we should
1916 # make filtering of revisions to send delegated to the store
1914 # make filtering of revisions to send delegated to the store
1917 # layer.
1915 # layer.
1918 frev, flr = store.rev, store.linkrev
1916 frev, flr = store.rev, store.linkrev
1919 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1917 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1920
1918
1921 # The 'source' parameter is useful for extensions
1919 # The 'source' parameter is useful for extensions
1922 def generatefiles(
1920 def generatefiles(
1923 self,
1921 self,
1924 changedfiles,
1922 changedfiles,
1925 commonrevs,
1923 commonrevs,
1926 source,
1924 source,
1927 mfdicts,
1925 mfdicts,
1928 fastpathlinkrev,
1926 fastpathlinkrev,
1929 fnodes,
1927 fnodes,
1930 clrevs,
1928 clrevs,
1931 sidedata_helpers=None,
1929 sidedata_helpers=None,
1932 debug_info=None,
1930 debug_info=None,
1933 ):
1931 ):
1934 changedfiles = [
1932 changedfiles = [
1935 f
1933 f
1936 for f in changedfiles
1934 for f in changedfiles
1937 if self._matcher(f) and not self._oldmatcher(f)
1935 if self._matcher(f) and not self._oldmatcher(f)
1938 ]
1936 ]
1939
1937
1940 if not fastpathlinkrev:
1938 if not fastpathlinkrev:
1941
1939
1942 def normallinknodes(unused, fname):
1940 def normallinknodes(unused, fname):
1943 return fnodes.get(fname, {})
1941 return fnodes.get(fname, {})
1944
1942
1945 else:
1943 else:
1946 cln = self._repo.changelog.node
1944 cln = self._repo.changelog.node
1947
1945
1948 def normallinknodes(store, fname):
1946 def normallinknodes(store, fname):
1949 flinkrev = store.linkrev
1947 flinkrev = store.linkrev
1950 fnode = store.node
1948 fnode = store.node
1951 revs = ((r, flinkrev(r)) for r in store)
1949 revs = ((r, flinkrev(r)) for r in store)
1952 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1950 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1953
1951
1954 clrevtolocalrev = {}
1952 clrevtolocalrev = {}
1955
1953
1956 if self._isshallow:
1954 if self._isshallow:
1957 # In a shallow clone, the linknodes callback needs to also include
1955 # In a shallow clone, the linknodes callback needs to also include
1958 # those file nodes that are in the manifests we sent but weren't
1956 # those file nodes that are in the manifests we sent but weren't
1959 # introduced by those manifests.
1957 # introduced by those manifests.
1960 commonctxs = [self._repo[c] for c in commonrevs]
1958 commonctxs = [self._repo[c] for c in commonrevs]
1961 clrev = self._repo.changelog.rev
1959 clrev = self._repo.changelog.rev
1962
1960
1963 def linknodes(flog, fname):
1961 def linknodes(flog, fname):
1964 for c in commonctxs:
1962 for c in commonctxs:
1965 try:
1963 try:
1966 fnode = c.filenode(fname)
1964 fnode = c.filenode(fname)
1967 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1965 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1968 except error.ManifestLookupError:
1966 except error.ManifestLookupError:
1969 pass
1967 pass
1970 links = normallinknodes(flog, fname)
1968 links = normallinknodes(flog, fname)
1971 if len(links) != len(mfdicts):
1969 if len(links) != len(mfdicts):
1972 for mf, lr in mfdicts:
1970 for mf, lr in mfdicts:
1973 fnode = mf.get(fname, None)
1971 fnode = mf.get(fname, None)
1974 if fnode in links:
1972 if fnode in links:
1975 links[fnode] = min(links[fnode], lr, key=clrev)
1973 links[fnode] = min(links[fnode], lr, key=clrev)
1976 elif fnode:
1974 elif fnode:
1977 links[fnode] = lr
1975 links[fnode] = lr
1978 return links
1976 return links
1979
1977
1980 else:
1978 else:
1981 linknodes = normallinknodes
1979 linknodes = normallinknodes
1982
1980
1983 repo = self._repo
1981 repo = self._repo
1984 progress = repo.ui.makeprogress(
1982 progress = repo.ui.makeprogress(
1985 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1983 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1986 )
1984 )
1987 for i, fname in enumerate(sorted(changedfiles)):
1985 for i, fname in enumerate(sorted(changedfiles)):
1988 filerevlog = repo.file(fname)
1986 filerevlog = repo.file(fname)
1989 if not filerevlog:
1987 if not filerevlog:
1990 raise error.Abort(
1988 raise error.Abort(
1991 _(b"empty or missing file data for %s") % fname
1989 _(b"empty or missing file data for %s") % fname
1992 )
1990 )
1993
1991
1994 clrevtolocalrev.clear()
1992 clrevtolocalrev.clear()
1995
1993
1996 linkrevnodes = linknodes(filerevlog, fname)
1994 linkrevnodes = linknodes(filerevlog, fname)
1995
1997 # Lookup for filenodes, we collected the linkrev nodes above in the
1996 # Lookup for filenodes, we collected the linkrev nodes above in the
1998 # fastpath case and with lookupmf in the slowpath case.
1997 # fastpath case and with lookupmf in the slowpath case.
1999 def lookupfilelog(x):
1998 def lookupfilelog(x):
2000 return linkrevnodes[x]
1999 return linkrevnodes[x]
2001
2000
2002 frev, flr = filerevlog.rev, filerevlog.linkrev
2001 frev, flr = filerevlog.rev, filerevlog.linkrev
2003 # Skip sending any filenode we know the client already
2002 # Skip sending any filenode we know the client already
2004 # has. This avoids over-sending files relatively
2003 # has. This avoids over-sending files relatively
2005 # inexpensively, so it's not a problem if we under-filter
2004 # inexpensively, so it's not a problem if we under-filter
2006 # here.
2005 # here.
2007 filenodes = [
2006 filenodes = [
2008 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
2007 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
2009 ]
2008 ]
2010
2009
2011 if not filenodes:
2010 if not filenodes:
2012 continue
2011 continue
2013
2012
2014 progress.update(i + 1, item=fname)
2013 progress.update(i + 1, item=fname)
2015
2014
2016 deltas = deltagroup(
2015 deltas = deltagroup(
2017 self._repo,
2016 self._repo,
2018 filerevlog,
2017 filerevlog,
2019 filenodes,
2018 filenodes,
2020 False,
2019 False,
2021 lookupfilelog,
2020 lookupfilelog,
2022 self._forcedeltaparentprev,
2021 self._forcedeltaparentprev,
2023 ellipses=self._ellipses,
2022 ellipses=self._ellipses,
2024 clrevtolocalrev=clrevtolocalrev,
2023 clrevtolocalrev=clrevtolocalrev,
2025 fullclnodes=self._fullclnodes,
2024 fullclnodes=self._fullclnodes,
2026 precomputedellipsis=self._precomputedellipsis,
2025 precomputedellipsis=self._precomputedellipsis,
2027 sidedata_helpers=sidedata_helpers,
2026 sidedata_helpers=sidedata_helpers,
2028 debug_info=debug_info,
2027 debug_info=debug_info,
2029 )
2028 )
2030
2029
2031 yield fname, deltas
2030 yield fname, deltas
2032
2031
2033 progress.complete()
2032 progress.complete()
2034
2033
2035
2034
2036 def _makecg1packer(
2035 def _makecg1packer(
2037 repo,
2036 repo,
2038 oldmatcher,
2037 oldmatcher,
2039 matcher,
2038 matcher,
2040 bundlecaps,
2039 bundlecaps,
2041 ellipses=False,
2040 ellipses=False,
2042 shallow=False,
2041 shallow=False,
2043 ellipsisroots=None,
2042 ellipsisroots=None,
2044 fullnodes=None,
2043 fullnodes=None,
2045 remote_sidedata=None,
2044 remote_sidedata=None,
2046 ):
2045 ):
2047 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
2046 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
2048 d.node, d.p1node, d.p2node, d.linknode
2047 d.node, d.p1node, d.p2node, d.linknode
2049 )
2048 )
2050
2049
2051 return cgpacker(
2050 return cgpacker(
2052 repo,
2051 repo,
2053 oldmatcher,
2052 oldmatcher,
2054 matcher,
2053 matcher,
2055 b'01',
2054 b'01',
2056 builddeltaheader=builddeltaheader,
2055 builddeltaheader=builddeltaheader,
2057 manifestsend=b'',
2056 manifestsend=b'',
2058 forcedeltaparentprev=True,
2057 forcedeltaparentprev=True,
2059 bundlecaps=bundlecaps,
2058 bundlecaps=bundlecaps,
2060 ellipses=ellipses,
2059 ellipses=ellipses,
2061 shallow=shallow,
2060 shallow=shallow,
2062 ellipsisroots=ellipsisroots,
2061 ellipsisroots=ellipsisroots,
2063 fullnodes=fullnodes,
2062 fullnodes=fullnodes,
2064 )
2063 )
2065
2064
2066
2065
2067 def _makecg2packer(
2066 def _makecg2packer(
2068 repo,
2067 repo,
2069 oldmatcher,
2068 oldmatcher,
2070 matcher,
2069 matcher,
2071 bundlecaps,
2070 bundlecaps,
2072 ellipses=False,
2071 ellipses=False,
2073 shallow=False,
2072 shallow=False,
2074 ellipsisroots=None,
2073 ellipsisroots=None,
2075 fullnodes=None,
2074 fullnodes=None,
2076 remote_sidedata=None,
2075 remote_sidedata=None,
2077 ):
2076 ):
2078 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
2077 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
2079 d.node, d.p1node, d.p2node, d.basenode, d.linknode
2078 d.node, d.p1node, d.p2node, d.basenode, d.linknode
2080 )
2079 )
2081
2080
2082 return cgpacker(
2081 return cgpacker(
2083 repo,
2082 repo,
2084 oldmatcher,
2083 oldmatcher,
2085 matcher,
2084 matcher,
2086 b'02',
2085 b'02',
2087 builddeltaheader=builddeltaheader,
2086 builddeltaheader=builddeltaheader,
2088 manifestsend=b'',
2087 manifestsend=b'',
2089 bundlecaps=bundlecaps,
2088 bundlecaps=bundlecaps,
2090 ellipses=ellipses,
2089 ellipses=ellipses,
2091 shallow=shallow,
2090 shallow=shallow,
2092 ellipsisroots=ellipsisroots,
2091 ellipsisroots=ellipsisroots,
2093 fullnodes=fullnodes,
2092 fullnodes=fullnodes,
2094 )
2093 )
2095
2094
2096
2095
2097 def _makecg3packer(
2096 def _makecg3packer(
2098 repo,
2097 repo,
2099 oldmatcher,
2098 oldmatcher,
2100 matcher,
2099 matcher,
2101 bundlecaps,
2100 bundlecaps,
2102 ellipses=False,
2101 ellipses=False,
2103 shallow=False,
2102 shallow=False,
2104 ellipsisroots=None,
2103 ellipsisroots=None,
2105 fullnodes=None,
2104 fullnodes=None,
2106 remote_sidedata=None,
2105 remote_sidedata=None,
2107 ):
2106 ):
2108 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
2107 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
2109 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
2108 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
2110 )
2109 )
2111
2110
2112 return cgpacker(
2111 return cgpacker(
2113 repo,
2112 repo,
2114 oldmatcher,
2113 oldmatcher,
2115 matcher,
2114 matcher,
2116 b'03',
2115 b'03',
2117 builddeltaheader=builddeltaheader,
2116 builddeltaheader=builddeltaheader,
2118 manifestsend=closechunk(),
2117 manifestsend=closechunk(),
2119 bundlecaps=bundlecaps,
2118 bundlecaps=bundlecaps,
2120 ellipses=ellipses,
2119 ellipses=ellipses,
2121 shallow=shallow,
2120 shallow=shallow,
2122 ellipsisroots=ellipsisroots,
2121 ellipsisroots=ellipsisroots,
2123 fullnodes=fullnodes,
2122 fullnodes=fullnodes,
2124 )
2123 )
2125
2124
2126
2125
2127 def _makecg4packer(
2126 def _makecg4packer(
2128 repo,
2127 repo,
2129 oldmatcher,
2128 oldmatcher,
2130 matcher,
2129 matcher,
2131 bundlecaps,
2130 bundlecaps,
2132 ellipses=False,
2131 ellipses=False,
2133 shallow=False,
2132 shallow=False,
2134 ellipsisroots=None,
2133 ellipsisroots=None,
2135 fullnodes=None,
2134 fullnodes=None,
2136 remote_sidedata=None,
2135 remote_sidedata=None,
2137 ):
2136 ):
2138 # Sidedata is in a separate chunk from the delta to differentiate
2137 # Sidedata is in a separate chunk from the delta to differentiate
2139 # "raw delta" and sidedata.
2138 # "raw delta" and sidedata.
2140 def builddeltaheader(d):
2139 def builddeltaheader(d):
2141 return _CHANGEGROUPV4_DELTA_HEADER.pack(
2140 return _CHANGEGROUPV4_DELTA_HEADER.pack(
2142 d.protocol_flags,
2141 d.protocol_flags,
2143 d.node,
2142 d.node,
2144 d.p1node,
2143 d.p1node,
2145 d.p2node,
2144 d.p2node,
2146 d.basenode,
2145 d.basenode,
2147 d.linknode,
2146 d.linknode,
2148 d.flags,
2147 d.flags,
2149 )
2148 )
2150
2149
2151 return cgpacker(
2150 return cgpacker(
2152 repo,
2151 repo,
2153 oldmatcher,
2152 oldmatcher,
2154 matcher,
2153 matcher,
2155 b'04',
2154 b'04',
2156 builddeltaheader=builddeltaheader,
2155 builddeltaheader=builddeltaheader,
2157 manifestsend=closechunk(),
2156 manifestsend=closechunk(),
2158 bundlecaps=bundlecaps,
2157 bundlecaps=bundlecaps,
2159 ellipses=ellipses,
2158 ellipses=ellipses,
2160 shallow=shallow,
2159 shallow=shallow,
2161 ellipsisroots=ellipsisroots,
2160 ellipsisroots=ellipsisroots,
2162 fullnodes=fullnodes,
2161 fullnodes=fullnodes,
2163 remote_sidedata=remote_sidedata,
2162 remote_sidedata=remote_sidedata,
2164 )
2163 )
2165
2164
2166
2165
2167 _packermap = {
2166 _packermap = {
2168 b'01': (_makecg1packer, cg1unpacker),
2167 b'01': (_makecg1packer, cg1unpacker),
2169 # cg2 adds support for exchanging generaldelta
2168 # cg2 adds support for exchanging generaldelta
2170 b'02': (_makecg2packer, cg2unpacker),
2169 b'02': (_makecg2packer, cg2unpacker),
2171 # cg3 adds support for exchanging revlog flags and treemanifests
2170 # cg3 adds support for exchanging revlog flags and treemanifests
2172 b'03': (_makecg3packer, cg3unpacker),
2171 b'03': (_makecg3packer, cg3unpacker),
2173 # ch4 adds support for exchanging sidedata
2172 # ch4 adds support for exchanging sidedata
2174 b'04': (_makecg4packer, cg4unpacker),
2173 b'04': (_makecg4packer, cg4unpacker),
2175 }
2174 }
2176
2175
2177
2176
2178 def allsupportedversions(repo):
2177 def allsupportedversions(repo):
2179 versions = set(_packermap.keys())
2178 versions = set(_packermap.keys())
2180 needv03 = False
2179 needv03 = False
2181 if (
2180 if (
2182 repo.ui.configbool(b'experimental', b'changegroup3')
2181 repo.ui.configbool(b'experimental', b'changegroup3')
2183 or repo.ui.configbool(b'experimental', b'treemanifest')
2182 or repo.ui.configbool(b'experimental', b'treemanifest')
2184 or scmutil.istreemanifest(repo)
2183 or scmutil.istreemanifest(repo)
2185 ):
2184 ):
2186 # we keep version 03 because we need to to exchange treemanifest data
2185 # we keep version 03 because we need to to exchange treemanifest data
2187 #
2186 #
2188 # we also keep vresion 01 and 02, because it is possible for repo to
2187 # we also keep vresion 01 and 02, because it is possible for repo to
2189 # contains both normal and tree manifest at the same time. so using
2188 # contains both normal and tree manifest at the same time. so using
2190 # older version to pull data is viable
2189 # older version to pull data is viable
2191 #
2190 #
2192 # (or even to push subset of history)
2191 # (or even to push subset of history)
2193 needv03 = True
2192 needv03 = True
2194 if not needv03:
2193 if not needv03:
2195 versions.discard(b'03')
2194 versions.discard(b'03')
2196 want_v4 = (
2195 want_v4 = (
2197 repo.ui.configbool(b'experimental', b'changegroup4')
2196 repo.ui.configbool(b'experimental', b'changegroup4')
2198 or requirements.REVLOGV2_REQUIREMENT in repo.requirements
2197 or requirements.REVLOGV2_REQUIREMENT in repo.requirements
2199 or requirements.CHANGELOGV2_REQUIREMENT in repo.requirements
2198 or requirements.CHANGELOGV2_REQUIREMENT in repo.requirements
2200 )
2199 )
2201 if not want_v4:
2200 if not want_v4:
2202 versions.discard(b'04')
2201 versions.discard(b'04')
2203 return versions
2202 return versions
2204
2203
2205
2204
2206 # Changegroup versions that can be applied to the repo
2205 # Changegroup versions that can be applied to the repo
2207 def supportedincomingversions(repo):
2206 def supportedincomingversions(repo):
2208 return allsupportedversions(repo)
2207 return allsupportedversions(repo)
2209
2208
2210
2209
2211 # Changegroup versions that can be created from the repo
2210 # Changegroup versions that can be created from the repo
2212 def supportedoutgoingversions(repo):
2211 def supportedoutgoingversions(repo):
2213 versions = allsupportedversions(repo)
2212 versions = allsupportedversions(repo)
2214 if scmutil.istreemanifest(repo):
2213 if scmutil.istreemanifest(repo):
2215 # Versions 01 and 02 support only flat manifests and it's just too
2214 # Versions 01 and 02 support only flat manifests and it's just too
2216 # expensive to convert between the flat manifest and tree manifest on
2215 # expensive to convert between the flat manifest and tree manifest on
2217 # the fly. Since tree manifests are hashed differently, all of history
2216 # the fly. Since tree manifests are hashed differently, all of history
2218 # would have to be converted. Instead, we simply don't even pretend to
2217 # would have to be converted. Instead, we simply don't even pretend to
2219 # support versions 01 and 02.
2218 # support versions 01 and 02.
2220 versions.discard(b'01')
2219 versions.discard(b'01')
2221 versions.discard(b'02')
2220 versions.discard(b'02')
2222 if requirements.NARROW_REQUIREMENT in repo.requirements:
2221 if requirements.NARROW_REQUIREMENT in repo.requirements:
2223 # Versions 01 and 02 don't support revlog flags, and we need to
2222 # Versions 01 and 02 don't support revlog flags, and we need to
2224 # support that for stripping and unbundling to work.
2223 # support that for stripping and unbundling to work.
2225 versions.discard(b'01')
2224 versions.discard(b'01')
2226 versions.discard(b'02')
2225 versions.discard(b'02')
2227 if LFS_REQUIREMENT in repo.requirements:
2226 if LFS_REQUIREMENT in repo.requirements:
2228 # Versions 01 and 02 don't support revlog flags, and we need to
2227 # Versions 01 and 02 don't support revlog flags, and we need to
2229 # mark LFS entries with REVIDX_EXTSTORED.
2228 # mark LFS entries with REVIDX_EXTSTORED.
2230 versions.discard(b'01')
2229 versions.discard(b'01')
2231 versions.discard(b'02')
2230 versions.discard(b'02')
2232
2231
2233 return versions
2232 return versions
2234
2233
2235
2234
2236 def localversion(repo):
2235 def localversion(repo):
2237 # Finds the best version to use for bundles that are meant to be used
2236 # Finds the best version to use for bundles that are meant to be used
2238 # locally, such as those from strip and shelve, and temporary bundles.
2237 # locally, such as those from strip and shelve, and temporary bundles.
2239 return max(supportedoutgoingversions(repo))
2238 return max(supportedoutgoingversions(repo))
2240
2239
2241
2240
2242 def safeversion(repo):
2241 def safeversion(repo):
2243 # Finds the smallest version that it's safe to assume clients of the repo
2242 # Finds the smallest version that it's safe to assume clients of the repo
2244 # will support. For example, all hg versions that support generaldelta also
2243 # will support. For example, all hg versions that support generaldelta also
2245 # support changegroup 02.
2244 # support changegroup 02.
2246 versions = supportedoutgoingversions(repo)
2245 versions = supportedoutgoingversions(repo)
2247 if requirements.GENERALDELTA_REQUIREMENT in repo.requirements:
2246 if requirements.GENERALDELTA_REQUIREMENT in repo.requirements:
2248 versions.discard(b'01')
2247 versions.discard(b'01')
2249 assert versions
2248 assert versions
2250 return min(versions)
2249 return min(versions)
2251
2250
2252
2251
2253 def getbundler(
2252 def getbundler(
2254 version,
2253 version,
2255 repo,
2254 repo,
2256 bundlecaps=None,
2255 bundlecaps=None,
2257 oldmatcher=None,
2256 oldmatcher=None,
2258 matcher=None,
2257 matcher=None,
2259 ellipses=False,
2258 ellipses=False,
2260 shallow=False,
2259 shallow=False,
2261 ellipsisroots=None,
2260 ellipsisroots=None,
2262 fullnodes=None,
2261 fullnodes=None,
2263 remote_sidedata=None,
2262 remote_sidedata=None,
2264 ):
2263 ):
2265 assert version in supportedoutgoingversions(repo)
2264 assert version in supportedoutgoingversions(repo)
2266
2265
2267 if matcher is None:
2266 if matcher is None:
2268 matcher = matchmod.always()
2267 matcher = matchmod.always()
2269 if oldmatcher is None:
2268 if oldmatcher is None:
2270 oldmatcher = matchmod.never()
2269 oldmatcher = matchmod.never()
2271
2270
2272 if version == b'01' and not matcher.always():
2271 if version == b'01' and not matcher.always():
2273 raise error.ProgrammingError(
2272 raise error.ProgrammingError(
2274 b'version 01 changegroups do not support sparse file matchers'
2273 b'version 01 changegroups do not support sparse file matchers'
2275 )
2274 )
2276
2275
2277 if ellipses and version in (b'01', b'02'):
2276 if ellipses and version in (b'01', b'02'):
2278 raise error.Abort(
2277 raise error.Abort(
2279 _(
2278 _(
2280 b'ellipsis nodes require at least cg3 on client and server, '
2279 b'ellipsis nodes require at least cg3 on client and server, '
2281 b'but negotiated version %s'
2280 b'but negotiated version %s'
2282 )
2281 )
2283 % version
2282 % version
2284 )
2283 )
2285
2284
2286 # Requested files could include files not in the local store. So
2285 # Requested files could include files not in the local store. So
2287 # filter those out.
2286 # filter those out.
2288 matcher = repo.narrowmatch(matcher)
2287 matcher = repo.narrowmatch(matcher)
2289
2288
2290 fn = _packermap[version][0]
2289 fn = _packermap[version][0]
2291 return fn(
2290 return fn(
2292 repo,
2291 repo,
2293 oldmatcher,
2292 oldmatcher,
2294 matcher,
2293 matcher,
2295 bundlecaps,
2294 bundlecaps,
2296 ellipses=ellipses,
2295 ellipses=ellipses,
2297 shallow=shallow,
2296 shallow=shallow,
2298 ellipsisroots=ellipsisroots,
2297 ellipsisroots=ellipsisroots,
2299 fullnodes=fullnodes,
2298 fullnodes=fullnodes,
2300 remote_sidedata=remote_sidedata,
2299 remote_sidedata=remote_sidedata,
2301 )
2300 )
2302
2301
2303
2302
2304 def getunbundler(version, fh, alg, extras=None):
2303 def getunbundler(version, fh, alg, extras=None):
2305 return _packermap[version][1](fh, alg, extras=extras)
2304 return _packermap[version][1](fh, alg, extras=extras)
2306
2305
2307
2306
2308 def _changegroupinfo(repo, nodes, source):
2307 def _changegroupinfo(repo, nodes, source):
2309 if repo.ui.verbose or source == b'bundle':
2308 if repo.ui.verbose or source == b'bundle':
2310 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
2309 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
2311 if repo.ui.debugflag:
2310 if repo.ui.debugflag:
2312 repo.ui.debug(b"list of changesets:\n")
2311 repo.ui.debug(b"list of changesets:\n")
2313 for node in nodes:
2312 for node in nodes:
2314 repo.ui.debug(b"%s\n" % hex(node))
2313 repo.ui.debug(b"%s\n" % hex(node))
2315
2314
2316
2315
2317 def makechangegroup(
2316 def makechangegroup(
2318 repo,
2317 repo,
2319 outgoing,
2318 outgoing,
2320 version,
2319 version,
2321 source,
2320 source,
2322 fastpath=False,
2321 fastpath=False,
2323 bundlecaps=None,
2322 bundlecaps=None,
2324 ):
2323 ):
2325 cgstream = makestream(
2324 cgstream = makestream(
2326 repo,
2325 repo,
2327 outgoing,
2326 outgoing,
2328 version,
2327 version,
2329 source,
2328 source,
2330 fastpath=fastpath,
2329 fastpath=fastpath,
2331 bundlecaps=bundlecaps,
2330 bundlecaps=bundlecaps,
2332 )
2331 )
2333 return getunbundler(
2332 return getunbundler(
2334 version,
2333 version,
2335 util.chunkbuffer(cgstream),
2334 util.chunkbuffer(cgstream),
2336 None,
2335 None,
2337 {b'clcount': len(outgoing.missing)},
2336 {b'clcount': len(outgoing.missing)},
2338 )
2337 )
2339
2338
2340
2339
2341 def makestream(
2340 def makestream(
2342 repo,
2341 repo,
2343 outgoing,
2342 outgoing,
2344 version,
2343 version,
2345 source,
2344 source,
2346 fastpath=False,
2345 fastpath=False,
2347 bundlecaps=None,
2346 bundlecaps=None,
2348 matcher=None,
2347 matcher=None,
2349 remote_sidedata=None,
2348 remote_sidedata=None,
2350 ):
2349 ):
2351 bundler = getbundler(
2350 bundler = getbundler(
2352 version,
2351 version,
2353 repo,
2352 repo,
2354 bundlecaps=bundlecaps,
2353 bundlecaps=bundlecaps,
2355 matcher=matcher,
2354 matcher=matcher,
2356 remote_sidedata=remote_sidedata,
2355 remote_sidedata=remote_sidedata,
2357 )
2356 )
2358
2357
2359 repo = repo.unfiltered()
2358 repo = repo.unfiltered()
2360 commonrevs = outgoing.common
2359 commonrevs = outgoing.common
2361 csets = outgoing.missing
2360 csets = outgoing.missing
2362 heads = outgoing.ancestorsof
2361 heads = outgoing.ancestorsof
2363 # We go through the fast path if we get told to, or if all (unfiltered
2362 # We go through the fast path if we get told to, or if all (unfiltered
2364 # heads have been requested (since we then know there all linkrevs will
2363 # heads have been requested (since we then know there all linkrevs will
2365 # be pulled by the client).
2364 # be pulled by the client).
2366 heads.sort()
2365 heads.sort()
2367 fastpathlinkrev = fastpath or (
2366 fastpathlinkrev = fastpath or (
2368 repo.filtername is None and heads == sorted(repo.heads())
2367 repo.filtername is None and heads == sorted(repo.heads())
2369 )
2368 )
2370
2369
2371 repo.hook(b'preoutgoing', throw=True, source=source)
2370 repo.hook(b'preoutgoing', throw=True, source=source)
2372 _changegroupinfo(repo, csets, source)
2371 _changegroupinfo(repo, csets, source)
2373 return bundler.generate(
2372 return bundler.generate(
2374 commonrevs,
2373 commonrevs,
2375 csets,
2374 csets,
2376 fastpathlinkrev,
2375 fastpathlinkrev,
2377 source,
2376 source,
2378 )
2377 )
2379
2378
2380
2379
2381 def _addchangegroupfiles(
2380 def _addchangegroupfiles(
2382 repo,
2381 repo,
2383 source,
2382 source,
2384 revmap,
2383 revmap,
2385 trp,
2384 trp,
2386 expectedfiles,
2385 expectedfiles,
2387 needfiles,
2386 needfiles,
2388 addrevisioncb=None,
2387 addrevisioncb=None,
2389 debug_info=None,
2388 debug_info=None,
2390 delta_base_reuse_policy=None,
2389 delta_base_reuse_policy=None,
2391 ):
2390 ):
2392 revisions = 0
2391 revisions = 0
2393 files = 0
2392 files = 0
2394 progress = repo.ui.makeprogress(
2393 progress = repo.ui.makeprogress(
2395 _(b'files'), unit=_(b'files'), total=expectedfiles
2394 _(b'files'), unit=_(b'files'), total=expectedfiles
2396 )
2395 )
2397 for chunkdata in iter(source.filelogheader, {}):
2396 for chunkdata in iter(source.filelogheader, {}):
2398 files += 1
2397 files += 1
2399 f = chunkdata[b"filename"]
2398 f = chunkdata[b"filename"]
2400 repo.ui.debug(b"adding %s revisions\n" % f)
2399 repo.ui.debug(b"adding %s revisions\n" % f)
2401 progress.increment()
2400 progress.increment()
2402 fl = repo.file(f)
2401 fl = repo.file(f)
2403 o = len(fl)
2402 o = len(fl)
2404 try:
2403 try:
2405 deltas = source.deltaiter()
2404 deltas = source.deltaiter()
2406 added = fl.addgroup(
2405 added = fl.addgroup(
2407 deltas,
2406 deltas,
2408 revmap,
2407 revmap,
2409 trp,
2408 trp,
2410 addrevisioncb=addrevisioncb,
2409 addrevisioncb=addrevisioncb,
2411 debug_info=debug_info,
2410 debug_info=debug_info,
2412 delta_base_reuse_policy=delta_base_reuse_policy,
2411 delta_base_reuse_policy=delta_base_reuse_policy,
2413 )
2412 )
2414 if not added:
2413 if not added:
2415 raise error.Abort(_(b"received file revlog group is empty"))
2414 raise error.Abort(_(b"received file revlog group is empty"))
2416 except error.CensoredBaseError as e:
2415 except error.CensoredBaseError as e:
2417 raise error.Abort(_(b"received delta base is censored: %s") % e)
2416 raise error.Abort(_(b"received delta base is censored: %s") % e)
2418 revisions += len(fl) - o
2417 revisions += len(fl) - o
2419 if f in needfiles:
2418 if f in needfiles:
2420 needs = needfiles[f]
2419 needs = needfiles[f]
2421 for new in range(o, len(fl)):
2420 for new in range(o, len(fl)):
2422 n = fl.node(new)
2421 n = fl.node(new)
2423 if n in needs:
2422 if n in needs:
2424 needs.remove(n)
2423 needs.remove(n)
2425 else:
2424 else:
2426 raise error.Abort(_(b"received spurious file revlog entry"))
2425 raise error.Abort(_(b"received spurious file revlog entry"))
2427 if not needs:
2426 if not needs:
2428 del needfiles[f]
2427 del needfiles[f]
2429 progress.complete()
2428 progress.complete()
2430
2429
2431 for f, needs in needfiles.items():
2430 for f, needs in needfiles.items():
2432 fl = repo.file(f)
2431 fl = repo.file(f)
2433 for n in needs:
2432 for n in needs:
2434 try:
2433 try:
2435 fl.rev(n)
2434 fl.rev(n)
2436 except error.LookupError:
2435 except error.LookupError:
2437 raise error.Abort(
2436 raise error.Abort(
2438 _(b'missing file data for %s:%s - run hg verify')
2437 _(b'missing file data for %s:%s - run hg verify')
2439 % (f, hex(n))
2438 % (f, hex(n))
2440 )
2439 )
2441
2440
2442 return revisions, files
2441 return revisions, files
@@ -1,1110 +1,1141 b''
1 # dagop.py - graph ancestry and topology algorithm for revset
1 # dagop.py - graph ancestry and topology algorithm for revset
2 #
2 #
3 # Copyright 2010 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2010 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import heapq
9 import heapq
10
10
11 from .thirdparty import attr
11 from .thirdparty import attr
12 from .node import nullrev
12 from .node import nullrev
13 from . import (
13 from . import (
14 error,
14 error,
15 mdiff,
15 mdiff,
16 patch,
16 patch,
17 pycompat,
17 pycompat,
18 scmutil,
18 scmutil,
19 smartset,
19 smartset,
20 )
20 )
21
21
22 baseset = smartset.baseset
22 baseset = smartset.baseset
23 generatorset = smartset.generatorset
23 generatorset = smartset.generatorset
24
24
25 # possible maximum depth between null and wdir()
25 # possible maximum depth between null and wdir()
26 maxlogdepth = 0x80000000
26 maxlogdepth = 0x80000000
27
27
28
28
29 def _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse):
29 def _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse):
30 """Walk DAG using 'pfunc' from the given 'revs' nodes
30 """Walk DAG using 'pfunc' from the given 'revs' nodes
31
31
32 'pfunc(rev)' should return the parent/child revisions of the given 'rev'
32 'pfunc(rev)' should return the parent/child revisions of the given 'rev'
33 if 'reverse' is True/False respectively.
33 if 'reverse' is True/False respectively.
34
34
35 Scan ends at the stopdepth (exlusive) if specified. Revisions found
35 Scan ends at the stopdepth (exlusive) if specified. Revisions found
36 earlier than the startdepth are omitted.
36 earlier than the startdepth are omitted.
37 """
37 """
38 if startdepth is None:
38 if startdepth is None:
39 startdepth = 0
39 startdepth = 0
40 if stopdepth is None:
40 if stopdepth is None:
41 stopdepth = maxlogdepth
41 stopdepth = maxlogdepth
42 if stopdepth == 0:
42 if stopdepth == 0:
43 return
43 return
44 if stopdepth < 0:
44 if stopdepth < 0:
45 raise error.ProgrammingError(b'negative stopdepth')
45 raise error.ProgrammingError(b'negative stopdepth')
46 if reverse:
46 if reverse:
47 heapsign = -1 # max heap
47 heapsign = -1 # max heap
48 else:
48 else:
49 heapsign = +1 # min heap
49 heapsign = +1 # min heap
50
50
51 # load input revs lazily to heap so earlier revisions can be yielded
51 # load input revs lazily to heap so earlier revisions can be yielded
52 # without fully computing the input revs
52 # without fully computing the input revs
53 revs.sort(reverse)
53 revs.sort(reverse)
54 irevs = iter(revs)
54 irevs = iter(revs)
55 pendingheap = [] # [(heapsign * rev, depth), ...] (i.e. lower depth first)
55 pendingheap = [] # [(heapsign * rev, depth), ...] (i.e. lower depth first)
56
56
57 inputrev = next(irevs, None)
57 inputrev = next(irevs, None)
58 if inputrev is not None:
58 if inputrev is not None:
59 heapq.heappush(pendingheap, (heapsign * inputrev, 0))
59 heapq.heappush(pendingheap, (heapsign * inputrev, 0))
60
60
61 lastrev = None
61 lastrev = None
62 while pendingheap:
62 while pendingheap:
63 currev, curdepth = heapq.heappop(pendingheap)
63 currev, curdepth = heapq.heappop(pendingheap)
64 currev = heapsign * currev
64 currev = heapsign * currev
65 if currev == inputrev:
65 if currev == inputrev:
66 inputrev = next(irevs, None)
66 inputrev = next(irevs, None)
67 if inputrev is not None:
67 if inputrev is not None:
68 heapq.heappush(pendingheap, (heapsign * inputrev, 0))
68 heapq.heappush(pendingheap, (heapsign * inputrev, 0))
69 # rescan parents until curdepth >= startdepth because queued entries
69 # rescan parents until curdepth >= startdepth because queued entries
70 # of the same revision are iterated from the lowest depth
70 # of the same revision are iterated from the lowest depth
71 foundnew = currev != lastrev
71 foundnew = currev != lastrev
72 if foundnew and curdepth >= startdepth:
72 if foundnew and curdepth >= startdepth:
73 lastrev = currev
73 lastrev = currev
74 yield currev
74 yield currev
75 pdepth = curdepth + 1
75 pdepth = curdepth + 1
76 if foundnew and pdepth < stopdepth:
76 if foundnew and pdepth < stopdepth:
77 for prev in pfunc(currev):
77 for prev in pfunc(currev):
78 if prev != nullrev:
78 if prev != nullrev:
79 heapq.heappush(pendingheap, (heapsign * prev, pdepth))
79 heapq.heappush(pendingheap, (heapsign * prev, pdepth))
80
80
81
81
82 def filectxancestors(fctxs, followfirst=False):
82 def filectxancestors(fctxs, followfirst=False):
83 """Like filectx.ancestors(), but can walk from multiple files/revisions,
83 """Like filectx.ancestors(), but can walk from multiple files/revisions,
84 and includes the given fctxs themselves
84 and includes the given fctxs themselves
85
85
86 Yields (rev, {fctx, ...}) pairs in descending order.
86 Yields (rev, {fctx, ...}) pairs in descending order.
87 """
87 """
88 visit = {}
88 visit = {}
89 visitheap = []
89 visitheap = []
90
90
91 def addvisit(fctx):
91 def addvisit(fctx):
92 rev = scmutil.intrev(fctx)
92 rev = scmutil.intrev(fctx)
93 if rev not in visit:
93 if rev not in visit:
94 visit[rev] = set()
94 visit[rev] = set()
95 heapq.heappush(visitheap, -rev) # max heap
95 heapq.heappush(visitheap, -rev) # max heap
96 visit[rev].add(fctx)
96 visit[rev].add(fctx)
97
97
98 if followfirst:
98 if followfirst:
99 cut = 1
99 cut = 1
100 else:
100 else:
101 cut = None
101 cut = None
102
102
103 for c in fctxs:
103 for c in fctxs:
104 addvisit(c)
104 addvisit(c)
105 while visit:
105 while visit:
106 currev = -(heapq.heappop(visitheap))
106 currev = -(heapq.heappop(visitheap))
107 curfctxs = visit.pop(currev)
107 curfctxs = visit.pop(currev)
108 yield currev, curfctxs
108 yield currev, curfctxs
109 for c in curfctxs:
109 for c in curfctxs:
110 for parent in c.parents()[:cut]:
110 for parent in c.parents()[:cut]:
111 addvisit(parent)
111 addvisit(parent)
112 assert not visitheap
112 assert not visitheap
113
113
114
114
115 def filerevancestors(fctxs, followfirst=False):
115 def filerevancestors(fctxs, followfirst=False):
116 """Like filectx.ancestors(), but can walk from multiple files/revisions,
116 """Like filectx.ancestors(), but can walk from multiple files/revisions,
117 and includes the given fctxs themselves
117 and includes the given fctxs themselves
118
118
119 Returns a smartset.
119 Returns a smartset.
120 """
120 """
121 gen = (rev for rev, _cs in filectxancestors(fctxs, followfirst))
121 gen = (rev for rev, _cs in filectxancestors(fctxs, followfirst))
122 return generatorset(gen, iterasc=False)
122 return generatorset(gen, iterasc=False)
123
123
124
124
125 def _genrevancestors(repo, revs, followfirst, startdepth, stopdepth, cutfunc):
125 def _genrevancestors(repo, revs, followfirst, startdepth, stopdepth, cutfunc):
126 if followfirst:
126 if followfirst:
127 cut = 1
127 cut = 1
128 else:
128 else:
129 cut = None
129 cut = None
130 cl = repo.changelog
130 cl = repo.changelog
131
131
132 def plainpfunc(rev):
132 def plainpfunc(rev):
133 try:
133 try:
134 return cl.parentrevs(rev)[:cut]
134 return cl.parentrevs(rev)[:cut]
135 except error.WdirUnsupported:
135 except error.WdirUnsupported:
136 return (pctx.rev() for pctx in repo[rev].parents()[:cut])
136 return (pctx.rev() for pctx in repo[rev].parents()[:cut])
137
137
138 if cutfunc is None:
138 if cutfunc is None:
139 pfunc = plainpfunc
139 pfunc = plainpfunc
140 else:
140 else:
141 pfunc = lambda rev: [r for r in plainpfunc(rev) if not cutfunc(r)]
141 pfunc = lambda rev: [r for r in plainpfunc(rev) if not cutfunc(r)]
142 revs = revs.filter(lambda rev: not cutfunc(rev))
142 revs = revs.filter(lambda rev: not cutfunc(rev))
143 return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=True)
143 return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=True)
144
144
145
145
146 def revancestors(
146 def revancestors(
147 repo, revs, followfirst=False, startdepth=None, stopdepth=None, cutfunc=None
147 repo, revs, followfirst=False, startdepth=None, stopdepth=None, cutfunc=None
148 ):
148 ):
149 r"""Like revlog.ancestors(), but supports additional options, includes
149 r"""Like revlog.ancestors(), but supports additional options, includes
150 the given revs themselves, and returns a smartset
150 the given revs themselves, and returns a smartset
151
151
152 Scan ends at the stopdepth (exlusive) if specified. Revisions found
152 Scan ends at the stopdepth (exlusive) if specified. Revisions found
153 earlier than the startdepth are omitted.
153 earlier than the startdepth are omitted.
154
154
155 If cutfunc is provided, it will be used to cut the traversal of the DAG.
155 If cutfunc is provided, it will be used to cut the traversal of the DAG.
156 When cutfunc(X) returns True, the DAG traversal stops - revision X and
156 When cutfunc(X) returns True, the DAG traversal stops - revision X and
157 X's ancestors in the traversal path will be skipped. This could be an
157 X's ancestors in the traversal path will be skipped. This could be an
158 optimization sometimes.
158 optimization sometimes.
159
159
160 Note: if Y is an ancestor of X, cutfunc(X) returning True does not
160 Note: if Y is an ancestor of X, cutfunc(X) returning True does not
161 necessarily mean Y will also be cut. Usually cutfunc(Y) also wants to
161 necessarily mean Y will also be cut. Usually cutfunc(Y) also wants to
162 return True in this case. For example,
162 return True in this case. For example,
163
163
164 D # revancestors(repo, D, cutfunc=lambda rev: rev == B)
164 D # revancestors(repo, D, cutfunc=lambda rev: rev == B)
165 |\ # will include "A", because the path D -> C -> A was not cut.
165 |\ # will include "A", because the path D -> C -> A was not cut.
166 B C # If "B" gets cut, "A" might want to be cut too.
166 B C # If "B" gets cut, "A" might want to be cut too.
167 |/
167 |/
168 A
168 A
169 """
169 """
170 gen = _genrevancestors(
170 gen = _genrevancestors(
171 repo, revs, followfirst, startdepth, stopdepth, cutfunc
171 repo, revs, followfirst, startdepth, stopdepth, cutfunc
172 )
172 )
173 return generatorset(gen, iterasc=False)
173 return generatorset(gen, iterasc=False)
174
174
175
175
176 def _genrevdescendants(repo, revs, followfirst):
176 def _genrevdescendants(repo, revs, followfirst):
177 if followfirst:
177 if followfirst:
178 cut = 1
178 cut = 1
179 else:
179 else:
180 cut = None
180 cut = None
181
181
182 cl = repo.changelog
182 cl = repo.changelog
183 first = revs.min()
183 first = revs.min()
184 if first == nullrev:
184 if first == nullrev:
185 # Are there nodes with a null first parent and a non-null
185 # Are there nodes with a null first parent and a non-null
186 # second one? Maybe. Do we care? Probably not.
186 # second one? Maybe. Do we care? Probably not.
187 yield first
187 yield first
188 for i in cl:
188 for i in cl:
189 yield i
189 yield i
190 else:
190 else:
191 seen = set(revs)
191 seen = set(revs)
192 for i in cl.revs(first):
192 for i in cl.revs(first):
193 if i in seen:
193 if i in seen:
194 yield i
194 yield i
195 continue
195 continue
196 for x in cl.parentrevs(i)[:cut]:
196 for x in cl.parentrevs(i)[:cut]:
197 if x != nullrev and x in seen:
197 if x != nullrev and x in seen:
198 seen.add(i)
198 seen.add(i)
199 yield i
199 yield i
200 break
200 break
201
201
202
202
203 def _builddescendantsmap(repo, startrev, followfirst):
203 def _builddescendantsmap(repo, startrev, followfirst):
204 """Build map of 'rev -> child revs', offset from startrev"""
204 """Build map of 'rev -> child revs', offset from startrev"""
205 cl = repo.changelog
205 cl = repo.changelog
206 descmap = [[] for _rev in range(startrev, len(cl))]
206 descmap = [[] for _rev in range(startrev, len(cl))]
207 for currev in cl.revs(startrev + 1):
207 for currev in cl.revs(startrev + 1):
208 p1rev, p2rev = cl.parentrevs(currev)
208 p1rev, p2rev = cl.parentrevs(currev)
209 if p1rev >= startrev:
209 if p1rev >= startrev:
210 descmap[p1rev - startrev].append(currev)
210 descmap[p1rev - startrev].append(currev)
211 if not followfirst and p2rev != nullrev and p2rev >= startrev:
211 if not followfirst and p2rev != nullrev and p2rev >= startrev:
212 descmap[p2rev - startrev].append(currev)
212 descmap[p2rev - startrev].append(currev)
213 return descmap
213 return descmap
214
214
215
215
216 def _genrevdescendantsofdepth(repo, revs, followfirst, startdepth, stopdepth):
216 def _genrevdescendantsofdepth(repo, revs, followfirst, startdepth, stopdepth):
217 startrev = revs.min()
217 startrev = revs.min()
218 descmap = _builddescendantsmap(repo, startrev, followfirst)
218 descmap = _builddescendantsmap(repo, startrev, followfirst)
219
219
220 def pfunc(rev):
220 def pfunc(rev):
221 return descmap[rev - startrev]
221 return descmap[rev - startrev]
222
222
223 return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=False)
223 return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=False)
224
224
225
225
226 def revdescendants(repo, revs, followfirst, startdepth=None, stopdepth=None):
226 def revdescendants(repo, revs, followfirst, startdepth=None, stopdepth=None):
227 """Like revlog.descendants() but supports additional options, includes
227 """Like revlog.descendants() but supports additional options, includes
228 the given revs themselves, and returns a smartset
228 the given revs themselves, and returns a smartset
229
229
230 Scan ends at the stopdepth (exlusive) if specified. Revisions found
230 Scan ends at the stopdepth (exlusive) if specified. Revisions found
231 earlier than the startdepth are omitted.
231 earlier than the startdepth are omitted.
232 """
232 """
233 if startdepth is None and (stopdepth is None or stopdepth >= maxlogdepth):
233 if startdepth is None and (stopdepth is None or stopdepth >= maxlogdepth):
234 gen = _genrevdescendants(repo, revs, followfirst)
234 gen = _genrevdescendants(repo, revs, followfirst)
235 else:
235 else:
236 gen = _genrevdescendantsofdepth(
236 gen = _genrevdescendantsofdepth(
237 repo, revs, followfirst, startdepth, stopdepth
237 repo, revs, followfirst, startdepth, stopdepth
238 )
238 )
239 return generatorset(gen, iterasc=True)
239 return generatorset(gen, iterasc=True)
240
240
241
241
242 def descendantrevs(revs, revsfn, parentrevsfn):
242 def descendantrevs(revs, revsfn, parentrevsfn):
243 """Generate revision number descendants in revision order.
243 """Generate revision number descendants in revision order.
244
244
245 Yields revision numbers starting with a child of some rev in
245 Yields revision numbers starting with a child of some rev in
246 ``revs``. Results are ordered by revision number and are
246 ``revs``. Results are ordered by revision number and are
247 therefore topological. Each revision is not considered a descendant
247 therefore topological. Each revision is not considered a descendant
248 of itself.
248 of itself.
249
249
250 ``revsfn`` is a callable that with no argument iterates over all
250 ``revsfn`` is a callable that with no argument iterates over all
251 revision numbers and with a ``start`` argument iterates over revision
251 revision numbers and with a ``start`` argument iterates over revision
252 numbers beginning with that value.
252 numbers beginning with that value.
253
253
254 ``parentrevsfn`` is a callable that receives a revision number and
254 ``parentrevsfn`` is a callable that receives a revision number and
255 returns an iterable of parent revision numbers, whose values may include
255 returns an iterable of parent revision numbers, whose values may include
256 nullrev.
256 nullrev.
257 """
257 """
258 first = min(revs)
258 first = min(revs)
259
259
260 if first == nullrev:
260 if first == nullrev:
261 for rev in revsfn():
261 for rev in revsfn():
262 yield rev
262 yield rev
263 return
263 return
264
264
265 seen = set(revs)
265 seen = set(revs)
266 for rev in revsfn(start=first + 1):
266 for rev in revsfn(start=first + 1):
267 for prev in parentrevsfn(rev):
267 for prev in parentrevsfn(rev):
268 if prev != nullrev and prev in seen:
268 if prev != nullrev and prev in seen:
269 seen.add(rev)
269 seen.add(rev)
270 yield rev
270 yield rev
271 break
271 break
272
272
273
273
274 class subsetparentswalker:
274 class subsetparentswalker:
275 r"""Scan adjacent ancestors in the graph given by the subset
275 r"""Scan adjacent ancestors in the graph given by the subset
276
276
277 This computes parent-child relations in the sub graph filtered by
277 This computes parent-child relations in the sub graph filtered by
278 a revset. Primary use case is to draw a revisions graph.
278 a revset. Primary use case is to draw a revisions graph.
279
279
280 In the following example, we consider that the node 'f' has edges to all
280 In the following example, we consider that the node 'f' has edges to all
281 ancestor nodes, but redundant paths are eliminated. The edge 'f'->'b'
281 ancestor nodes, but redundant paths are eliminated. The edge 'f'->'b'
282 is eliminated because there is a path 'f'->'c'->'b' for example.
282 is eliminated because there is a path 'f'->'c'->'b' for example.
283
283
284 - d - e -
284 - d - e -
285 / \
285 / \
286 a - b - c - f
286 a - b - c - f
287
287
288 If the node 'c' is filtered out, the edge 'f'->'b' is activated.
288 If the node 'c' is filtered out, the edge 'f'->'b' is activated.
289
289
290 - d - e -
290 - d - e -
291 / \
291 / \
292 a - b -(c)- f
292 a - b -(c)- f
293
293
294 Likewise, if 'd' and 'e' are filtered out, this edge is fully eliminated
294 Likewise, if 'd' and 'e' are filtered out, this edge is fully eliminated
295 since there is a path 'f'->'c'->'b'->'a' for 'f'->'a'.
295 since there is a path 'f'->'c'->'b'->'a' for 'f'->'a'.
296
296
297 (d) (e)
297 (d) (e)
298
298
299 a - b - c - f
299 a - b - c - f
300
300
301 Implementation-wise, 'f' is passed down to 'a' as unresolved through the
301 Implementation-wise, 'f' is passed down to 'a' as unresolved through the
302 'f'->'e'->'d'->'a' path, whereas we do also remember that 'f' has already
302 'f'->'e'->'d'->'a' path, whereas we do also remember that 'f' has already
303 been resolved while walking down the 'f'->'c'->'b'->'a' path. When
303 been resolved while walking down the 'f'->'c'->'b'->'a' path. When
304 processing the node 'a', the unresolved 'f'->'a' path is eliminated as
304 processing the node 'a', the unresolved 'f'->'a' path is eliminated as
305 the 'f' end is marked as resolved.
305 the 'f' end is marked as resolved.
306
306
307 Ancestors are searched from the tipmost revision in the subset so the
307 Ancestors are searched from the tipmost revision in the subset so the
308 results can be cached. You should specify startrev to narrow the search
308 results can be cached. You should specify startrev to narrow the search
309 space to ':startrev'.
309 space to ':startrev'.
310 """
310 """
311
311
312 def __init__(self, repo, subset, startrev=None):
312 def __init__(self, repo, subset, startrev=None):
313 if startrev is not None:
313 if startrev is not None:
314 subset = repo.revs(b'%d:null', startrev) & subset
314 subset = repo.revs(b'%d:null', startrev) & subset
315
315
316 # equivalent to 'subset = subset.sorted(reverse=True)', but there's
316 # equivalent to 'subset = subset.sorted(reverse=True)', but there's
317 # no such function.
317 # no such function.
318 fastdesc = subset.fastdesc
318 fastdesc = subset.fastdesc
319 if fastdesc:
319 if fastdesc:
320 desciter = fastdesc()
320 desciter = fastdesc()
321 else:
321 else:
322 if not subset.isdescending() and not subset.istopo():
322 if not subset.isdescending() and not subset.istopo():
323 subset = smartset.baseset(subset)
323 subset = smartset.baseset(subset)
324 subset.sort(reverse=True)
324 subset.sort(reverse=True)
325 desciter = iter(subset)
325 desciter = iter(subset)
326
326
327 self._repo = repo
327 self._repo = repo
328 self._changelog = repo.changelog
328 self._changelog = repo.changelog
329 self._subset = subset
329 self._subset = subset
330
330
331 # scanning state (see _scanparents):
331 # scanning state (see _scanparents):
332 self._tovisit = []
332 self._tovisit = []
333 self._pendingcnt = {}
333 self._pendingcnt = {}
334 self._pointers = {}
334 self._pointers = {}
335 self._parents = {}
335 self._parents = {}
336 self._inputhead = nullrev # reassigned by self._advanceinput()
336 self._inputhead = nullrev # reassigned by self._advanceinput()
337 self._inputtail = desciter
337 self._inputtail = desciter
338 self._bottomrev = nullrev
338 self._bottomrev = nullrev
339 self._advanceinput()
339 self._advanceinput()
340
340
341 def parentsset(self, rev):
341 def parentsset(self, rev):
342 """Look up parents of the given revision in the subset, and returns
342 """Look up parents of the given revision in the subset, and returns
343 as a smartset"""
343 as a smartset"""
344 return smartset.baseset(self.parents(rev))
344 return smartset.baseset(self.parents(rev))
345
345
346 def parents(self, rev):
346 def parents(self, rev):
347 """Look up parents of the given revision in the subset
347 """Look up parents of the given revision in the subset
348
348
349 The returned revisions are sorted by parent index (p1/p2).
349 The returned revisions are sorted by parent index (p1/p2).
350 """
350 """
351 self._scanparents(rev)
351 self._scanparents(rev)
352 return [r for _c, r in sorted(self._parents.get(rev, []))]
352 return [r for _c, r in sorted(self._parents.get(rev, []))]
353
353
354 def _parentrevs(self, rev):
354 def _parentrevs(self, rev):
355 try:
355 try:
356 revs = self._changelog.parentrevs(rev)
356 revs = self._changelog.parentrevs(rev)
357 if revs[-1] == nullrev:
357 if revs[-1] == nullrev:
358 return revs[:-1]
358 return revs[:-1]
359 return revs
359 return revs
360 except error.WdirUnsupported:
360 except error.WdirUnsupported:
361 return tuple(pctx.rev() for pctx in self._repo[None].parents())
361 return tuple(pctx.rev() for pctx in self._repo[None].parents())
362
362
363 def _advanceinput(self):
363 def _advanceinput(self):
364 """Advance the input iterator and set the next revision to _inputhead"""
364 """Advance the input iterator and set the next revision to _inputhead"""
365 if self._inputhead < nullrev:
365 if self._inputhead < nullrev:
366 return
366 return
367 try:
367 try:
368 self._inputhead = next(self._inputtail)
368 self._inputhead = next(self._inputtail)
369 except StopIteration:
369 except StopIteration:
370 self._bottomrev = self._inputhead
370 self._bottomrev = self._inputhead
371 self._inputhead = nullrev - 1
371 self._inputhead = nullrev - 1
372
372
373 def _scanparents(self, stoprev):
373 def _scanparents(self, stoprev):
374 """Scan ancestors until the parents of the specified stoprev are
374 """Scan ancestors until the parents of the specified stoprev are
375 resolved"""
375 resolved"""
376
376
377 # 'tovisit' is the queue of the input revisions and their ancestors.
377 # 'tovisit' is the queue of the input revisions and their ancestors.
378 # It will be populated incrementally to minimize the initial cost
378 # It will be populated incrementally to minimize the initial cost
379 # of computing the given subset.
379 # of computing the given subset.
380 #
380 #
381 # For to-visit revisions, we keep track of
381 # For to-visit revisions, we keep track of
382 # - the number of the unresolved paths: pendingcnt[rev],
382 # - the number of the unresolved paths: pendingcnt[rev],
383 # - dict of the unresolved descendants and chains: pointers[rev][0],
383 # - dict of the unresolved descendants and chains: pointers[rev][0],
384 # - set of the already resolved descendants: pointers[rev][1].
384 # - set of the already resolved descendants: pointers[rev][1].
385 #
385 #
386 # When a revision is visited, 'pointers[rev]' should be popped and
386 # When a revision is visited, 'pointers[rev]' should be popped and
387 # propagated to its parents accordingly.
387 # propagated to its parents accordingly.
388 #
388 #
389 # Once all pending paths have been resolved, 'pendingcnt[rev]' becomes
389 # Once all pending paths have been resolved, 'pendingcnt[rev]' becomes
390 # 0 and 'parents[rev]' contains the unsorted list of parent revisions
390 # 0 and 'parents[rev]' contains the unsorted list of parent revisions
391 # and p1/p2 chains (excluding linear paths.) The p1/p2 chains will be
391 # and p1/p2 chains (excluding linear paths.) The p1/p2 chains will be
392 # used as a sort key preferring p1. 'len(chain)' should be the number
392 # used as a sort key preferring p1. 'len(chain)' should be the number
393 # of merges between two revisions.
393 # of merges between two revisions.
394
394
395 subset = self._subset
395 subset = self._subset
396 tovisit = self._tovisit # heap queue of [-rev]
396 tovisit = self._tovisit # heap queue of [-rev]
397 pendingcnt = self._pendingcnt # {rev: count} for visited revisions
397 pendingcnt = self._pendingcnt # {rev: count} for visited revisions
398 pointers = self._pointers # {rev: [{unresolved_rev: chain}, resolved]}
398 pointers = self._pointers # {rev: [{unresolved_rev: chain}, resolved]}
399 parents = self._parents # {rev: [(chain, rev)]}
399 parents = self._parents # {rev: [(chain, rev)]}
400
400
401 while tovisit or self._inputhead >= nullrev:
401 while tovisit or self._inputhead >= nullrev:
402 if pendingcnt.get(stoprev) == 0:
402 if pendingcnt.get(stoprev) == 0:
403 return
403 return
404
404
405 # feed greater revisions from input set to queue
405 # feed greater revisions from input set to queue
406 if not tovisit:
406 if not tovisit:
407 heapq.heappush(tovisit, -self._inputhead)
407 heapq.heappush(tovisit, -self._inputhead)
408 self._advanceinput()
408 self._advanceinput()
409 while self._inputhead >= -tovisit[0]:
409 while self._inputhead >= -tovisit[0]:
410 heapq.heappush(tovisit, -self._inputhead)
410 heapq.heappush(tovisit, -self._inputhead)
411 self._advanceinput()
411 self._advanceinput()
412
412
413 rev = -heapq.heappop(tovisit)
413 rev = -heapq.heappop(tovisit)
414 if rev < self._bottomrev:
414 if rev < self._bottomrev:
415 return
415 return
416 if rev in pendingcnt and rev not in pointers:
416 if rev in pendingcnt and rev not in pointers:
417 continue # already visited
417 continue # already visited
418
418
419 curactive = rev in subset
419 curactive = rev in subset
420 pendingcnt.setdefault(rev, 0) # mark as visited
420 pendingcnt.setdefault(rev, 0) # mark as visited
421 if curactive:
421 if curactive:
422 assert rev not in parents
422 assert rev not in parents
423 parents[rev] = []
423 parents[rev] = []
424 unresolved, resolved = pointers.pop(rev, ({}, set()))
424 unresolved, resolved = pointers.pop(rev, ({}, set()))
425
425
426 if curactive:
426 if curactive:
427 # reached to active rev, resolve pending descendants' parents
427 # reached to active rev, resolve pending descendants' parents
428 for r, c in unresolved.items():
428 for r, c in unresolved.items():
429 pendingcnt[r] -= 1
429 pendingcnt[r] -= 1
430 assert pendingcnt[r] >= 0
430 assert pendingcnt[r] >= 0
431 if r in resolved:
431 if r in resolved:
432 continue # eliminate redundant path
432 continue # eliminate redundant path
433 parents[r].append((c, rev))
433 parents[r].append((c, rev))
434 # mark the descendant 'r' as resolved through this path if
434 # mark the descendant 'r' as resolved through this path if
435 # there are still pending pointers. the 'resolved' set may
435 # there are still pending pointers. the 'resolved' set may
436 # be concatenated later at a fork revision.
436 # be concatenated later at a fork revision.
437 if pendingcnt[r] > 0:
437 if pendingcnt[r] > 0:
438 resolved.add(r)
438 resolved.add(r)
439 unresolved.clear()
439 unresolved.clear()
440 # occasionally clean resolved markers. otherwise the set
440 # occasionally clean resolved markers. otherwise the set
441 # would grow indefinitely.
441 # would grow indefinitely.
442 resolved = {r for r in resolved if pendingcnt[r] > 0}
442 resolved = {r for r in resolved if pendingcnt[r] > 0}
443
443
444 parentrevs = self._parentrevs(rev)
444 parentrevs = self._parentrevs(rev)
445 bothparentsactive = all(p in subset for p in parentrevs)
445 bothparentsactive = all(p in subset for p in parentrevs)
446
446
447 # set up or propagate tracking pointers if
447 # set up or propagate tracking pointers if
448 # - one of the parents is not active,
448 # - one of the parents is not active,
449 # - or descendants' parents are unresolved.
449 # - or descendants' parents are unresolved.
450 if not bothparentsactive or unresolved or resolved:
450 if not bothparentsactive or unresolved or resolved:
451 if len(parentrevs) <= 1:
451 if len(parentrevs) <= 1:
452 # can avoid copying the tracking pointer
452 # can avoid copying the tracking pointer
453 parentpointers = [(unresolved, resolved)]
453 parentpointers = [(unresolved, resolved)]
454 else:
454 else:
455 parentpointers = [
455 parentpointers = [
456 (unresolved, resolved),
456 (unresolved, resolved),
457 (unresolved.copy(), resolved.copy()),
457 (unresolved.copy(), resolved.copy()),
458 ]
458 ]
459 # 'rev' is a merge revision. increment the pending count
459 # 'rev' is a merge revision. increment the pending count
460 # as the 'unresolved' dict will be duplicated, and append
460 # as the 'unresolved' dict will be duplicated, and append
461 # p1/p2 code to the existing chains.
461 # p1/p2 code to the existing chains.
462 for r in unresolved:
462 for r in unresolved:
463 pendingcnt[r] += 1
463 pendingcnt[r] += 1
464 parentpointers[0][0][r] += b'1'
464 parentpointers[0][0][r] += b'1'
465 parentpointers[1][0][r] += b'2'
465 parentpointers[1][0][r] += b'2'
466 for i, p in enumerate(parentrevs):
466 for i, p in enumerate(parentrevs):
467 assert p < rev
467 assert p < rev
468 heapq.heappush(tovisit, -p)
468 heapq.heappush(tovisit, -p)
469 if p in pointers:
469 if p in pointers:
470 # 'p' is a fork revision. concatenate tracking pointers
470 # 'p' is a fork revision. concatenate tracking pointers
471 # and decrement the pending count accordingly.
471 # and decrement the pending count accordingly.
472 knownunresolved, knownresolved = pointers[p]
472 knownunresolved, knownresolved = pointers[p]
473 unresolved, resolved = parentpointers[i]
473 unresolved, resolved = parentpointers[i]
474 for r, c in unresolved.items():
474 for r, c in unresolved.items():
475 if r in knownunresolved:
475 if r in knownunresolved:
476 # unresolved at both paths
476 # unresolved at both paths
477 pendingcnt[r] -= 1
477 pendingcnt[r] -= 1
478 assert pendingcnt[r] > 0
478 assert pendingcnt[r] > 0
479 # take shorter chain
479 # take shorter chain
480 knownunresolved[r] = min(c, knownunresolved[r])
480 knownunresolved[r] = min(c, knownunresolved[r])
481 else:
481 else:
482 knownunresolved[r] = c
482 knownunresolved[r] = c
483 # simply propagate the 'resolved' set as deduplicating
483 # simply propagate the 'resolved' set as deduplicating
484 # 'unresolved' here would be slightly complicated.
484 # 'unresolved' here would be slightly complicated.
485 knownresolved.update(resolved)
485 knownresolved.update(resolved)
486 else:
486 else:
487 pointers[p] = parentpointers[i]
487 pointers[p] = parentpointers[i]
488
488
489 # then, populate the active parents directly and add the current
489 # then, populate the active parents directly and add the current
490 # 'rev' to the tracking pointers of the inactive parents.
490 # 'rev' to the tracking pointers of the inactive parents.
491 # 'pointers[p]' may be optimized out if both parents are active.
491 # 'pointers[p]' may be optimized out if both parents are active.
492 chaincodes = [b''] if len(parentrevs) <= 1 else [b'1', b'2']
492 chaincodes = [b''] if len(parentrevs) <= 1 else [b'1', b'2']
493 if curactive and bothparentsactive:
493 if curactive and bothparentsactive:
494 for i, p in enumerate(parentrevs):
494 for i, p in enumerate(parentrevs):
495 c = chaincodes[i]
495 c = chaincodes[i]
496 parents[rev].append((c, p))
496 parents[rev].append((c, p))
497 # no need to mark 'rev' as resolved since the 'rev' should
497 # no need to mark 'rev' as resolved since the 'rev' should
498 # be fully resolved (i.e. pendingcnt[rev] == 0)
498 # be fully resolved (i.e. pendingcnt[rev] == 0)
499 assert pendingcnt[rev] == 0
499 assert pendingcnt[rev] == 0
500 elif curactive:
500 elif curactive:
501 for i, p in enumerate(parentrevs):
501 for i, p in enumerate(parentrevs):
502 unresolved, resolved = pointers[p]
502 unresolved, resolved = pointers[p]
503 assert rev not in unresolved
503 assert rev not in unresolved
504 c = chaincodes[i]
504 c = chaincodes[i]
505 if p in subset:
505 if p in subset:
506 parents[rev].append((c, p))
506 parents[rev].append((c, p))
507 # mark 'rev' as resolved through this path
507 # mark 'rev' as resolved through this path
508 resolved.add(rev)
508 resolved.add(rev)
509 else:
509 else:
510 pendingcnt[rev] += 1
510 pendingcnt[rev] += 1
511 unresolved[rev] = c
511 unresolved[rev] = c
512 assert 0 < pendingcnt[rev] <= 2
512 assert 0 < pendingcnt[rev] <= 2
513
513
514
514
515 def _reachablerootspure(pfunc, minroot, roots, heads, includepath):
515 def _reachablerootspure(pfunc, minroot, roots, heads, includepath):
516 """See revlog.reachableroots"""
516 """See revlog.reachableroots"""
517 if not roots:
517 if not roots:
518 return []
518 return []
519 roots = set(roots)
519 roots = set(roots)
520 visit = list(heads)
520 visit = list(heads)
521 reachable = set()
521 reachable = set()
522 seen = {}
522 seen = {}
523 # prefetch all the things! (because python is slow)
523 # prefetch all the things! (because python is slow)
524 reached = reachable.add
524 reached = reachable.add
525 dovisit = visit.append
525 dovisit = visit.append
526 nextvisit = visit.pop
526 nextvisit = visit.pop
527 # open-code the post-order traversal due to the tiny size of
527 # open-code the post-order traversal due to the tiny size of
528 # sys.getrecursionlimit()
528 # sys.getrecursionlimit()
529 while visit:
529 while visit:
530 rev = nextvisit()
530 rev = nextvisit()
531 if rev in roots:
531 if rev in roots:
532 reached(rev)
532 reached(rev)
533 if not includepath:
533 if not includepath:
534 continue
534 continue
535 parents = pfunc(rev)
535 parents = pfunc(rev)
536 seen[rev] = parents
536 seen[rev] = parents
537 for parent in parents:
537 for parent in parents:
538 if parent >= minroot and parent not in seen:
538 if parent >= minroot and parent not in seen:
539 dovisit(parent)
539 dovisit(parent)
540 if not reachable:
540 if not reachable:
541 return baseset()
541 return baseset()
542 if not includepath:
542 if not includepath:
543 return reachable
543 return reachable
544 for rev in sorted(seen):
544 for rev in sorted(seen):
545 for parent in seen[rev]:
545 for parent in seen[rev]:
546 if parent in reachable:
546 if parent in reachable:
547 reached(rev)
547 reached(rev)
548 return reachable
548 return reachable
549
549
550
550
551 def reachableroots(repo, roots, heads, includepath=False):
551 def reachableroots(repo, roots, heads, includepath=False):
552 """See revlog.reachableroots"""
552 """See revlog.reachableroots"""
553 if not roots:
553 if not roots:
554 return baseset()
554 return baseset()
555 minroot = roots.min()
555 minroot = roots.min()
556 roots = list(roots)
556 roots = list(roots)
557 heads = list(heads)
557 heads = list(heads)
558 revs = repo.changelog.reachableroots(minroot, heads, roots, includepath)
558 revs = repo.changelog.reachableroots(minroot, heads, roots, includepath)
559 revs = baseset(revs)
559 revs = baseset(revs)
560 revs.sort()
560 revs.sort()
561 return revs
561 return revs
562
562
563
563
564 def _changesrange(fctx1, fctx2, linerange2, diffopts):
564 def _changesrange(fctx1, fctx2, linerange2, diffopts):
565 """Return `(diffinrange, linerange1)` where `diffinrange` is True
565 """Return `(diffinrange, linerange1)` where `diffinrange` is True
566 if diff from fctx2 to fctx1 has changes in linerange2 and
566 if diff from fctx2 to fctx1 has changes in linerange2 and
567 `linerange1` is the new line range for fctx1.
567 `linerange1` is the new line range for fctx1.
568 """
568 """
569 blocks = mdiff.allblocks(fctx1.data(), fctx2.data(), diffopts)
569 blocks = mdiff.allblocks(fctx1.data(), fctx2.data(), diffopts)
570 filteredblocks, linerange1 = mdiff.blocksinrange(blocks, linerange2)
570 filteredblocks, linerange1 = mdiff.blocksinrange(blocks, linerange2)
571 diffinrange = any(stype == b'!' for _, stype in filteredblocks)
571 diffinrange = any(stype == b'!' for _, stype in filteredblocks)
572 return diffinrange, linerange1
572 return diffinrange, linerange1
573
573
574
574
575 def blockancestors(fctx, fromline, toline, followfirst=False):
575 def blockancestors(fctx, fromline, toline, followfirst=False):
576 """Yield ancestors of `fctx` with respect to the block of lines within
576 """Yield ancestors of `fctx` with respect to the block of lines within
577 `fromline`-`toline` range.
577 `fromline`-`toline` range.
578 """
578 """
579 diffopts = patch.diffopts(fctx._repo.ui)
579 diffopts = patch.diffopts(fctx._repo.ui)
580 fctx = fctx.introfilectx()
580 fctx = fctx.introfilectx()
581 visit = {(fctx.linkrev(), fctx.filenode()): (fctx, (fromline, toline))}
581 visit = {(fctx.linkrev(), fctx.filenode()): (fctx, (fromline, toline))}
582 while visit:
582 while visit:
583 c, linerange2 = visit.pop(max(visit))
583 c, linerange2 = visit.pop(max(visit))
584 pl = c.parents()
584 pl = c.parents()
585 if followfirst:
585 if followfirst:
586 pl = pl[:1]
586 pl = pl[:1]
587 if not pl:
587 if not pl:
588 # The block originates from the initial revision.
588 # The block originates from the initial revision.
589 yield c, linerange2
589 yield c, linerange2
590 continue
590 continue
591 inrange = False
591 inrange = False
592 for p in pl:
592 for p in pl:
593 inrangep, linerange1 = _changesrange(p, c, linerange2, diffopts)
593 inrangep, linerange1 = _changesrange(p, c, linerange2, diffopts)
594 inrange = inrange or inrangep
594 inrange = inrange or inrangep
595 if linerange1[0] == linerange1[1]:
595 if linerange1[0] == linerange1[1]:
596 # Parent's linerange is empty, meaning that the block got
596 # Parent's linerange is empty, meaning that the block got
597 # introduced in this revision; no need to go futher in this
597 # introduced in this revision; no need to go futher in this
598 # branch.
598 # branch.
599 continue
599 continue
600 # Set _descendantrev with 'c' (a known descendant) so that, when
600 # Set _descendantrev with 'c' (a known descendant) so that, when
601 # _adjustlinkrev is called for 'p', it receives this descendant
601 # _adjustlinkrev is called for 'p', it receives this descendant
602 # (as srcrev) instead possibly topmost introrev.
602 # (as srcrev) instead possibly topmost introrev.
603 p._descendantrev = c.rev()
603 p._descendantrev = c.rev()
604 visit[p.linkrev(), p.filenode()] = p, linerange1
604 visit[p.linkrev(), p.filenode()] = p, linerange1
605 if inrange:
605 if inrange:
606 yield c, linerange2
606 yield c, linerange2
607
607
608
608
609 def blockdescendants(fctx, fromline, toline):
609 def blockdescendants(fctx, fromline, toline):
610 """Yield descendants of `fctx` with respect to the block of lines within
610 """Yield descendants of `fctx` with respect to the block of lines within
611 `fromline`-`toline` range.
611 `fromline`-`toline` range.
612 """
612 """
613 # First possibly yield 'fctx' if it has changes in range with respect to
613 # First possibly yield 'fctx' if it has changes in range with respect to
614 # its parents.
614 # its parents.
615 try:
615 try:
616 c, linerange1 = next(blockancestors(fctx, fromline, toline))
616 c, linerange1 = next(blockancestors(fctx, fromline, toline))
617 except StopIteration:
617 except StopIteration:
618 pass
618 pass
619 else:
619 else:
620 if c == fctx:
620 if c == fctx:
621 yield c, linerange1
621 yield c, linerange1
622
622
623 diffopts = patch.diffopts(fctx._repo.ui)
623 diffopts = patch.diffopts(fctx._repo.ui)
624 fl = fctx.filelog()
624 fl = fctx.filelog()
625 seen = {fctx.filerev(): (fctx, (fromline, toline))}
625 seen = {fctx.filerev(): (fctx, (fromline, toline))}
626 for i in fl.descendants([fctx.filerev()]):
626 for i in fl.descendants([fctx.filerev()]):
627 c = fctx.filectx(i)
627 c = fctx.filectx(i)
628 inrange = False
628 inrange = False
629 for x in fl.parentrevs(i):
629 for x in fl.parentrevs(i):
630 try:
630 try:
631 p, linerange2 = seen[x]
631 p, linerange2 = seen[x]
632 except KeyError:
632 except KeyError:
633 # nullrev or other branch
633 # nullrev or other branch
634 continue
634 continue
635 inrangep, linerange1 = _changesrange(c, p, linerange2, diffopts)
635 inrangep, linerange1 = _changesrange(c, p, linerange2, diffopts)
636 inrange = inrange or inrangep
636 inrange = inrange or inrangep
637 # If revision 'i' has been seen (it's a merge) and the line range
637 # If revision 'i' has been seen (it's a merge) and the line range
638 # previously computed differs from the one we just got, we take the
638 # previously computed differs from the one we just got, we take the
639 # surrounding interval. This is conservative but avoids loosing
639 # surrounding interval. This is conservative but avoids loosing
640 # information.
640 # information.
641 if i in seen and seen[i][1] != linerange1:
641 if i in seen and seen[i][1] != linerange1:
642 lbs, ubs = zip(linerange1, seen[i][1])
642 lbs, ubs = zip(linerange1, seen[i][1])
643 linerange1 = min(lbs), max(ubs)
643 linerange1 = min(lbs), max(ubs)
644 seen[i] = c, linerange1
644 seen[i] = c, linerange1
645 if inrange:
645 if inrange:
646 yield c, linerange1
646 yield c, linerange1
647
647
648
648
649 @attr.s(slots=True, frozen=True)
649 @attr.s(slots=True, frozen=True)
650 class annotateline:
650 class annotateline:
651 fctx = attr.ib()
651 fctx = attr.ib()
652 lineno = attr.ib()
652 lineno = attr.ib()
653 # Whether this annotation was the result of a skip-annotate.
653 # Whether this annotation was the result of a skip-annotate.
654 skip = attr.ib(default=False)
654 skip = attr.ib(default=False)
655 text = attr.ib(default=None)
655 text = attr.ib(default=None)
656
656
657
657
658 @attr.s(slots=True, frozen=True)
658 @attr.s(slots=True, frozen=True)
659 class _annotatedfile:
659 class _annotatedfile:
660 # list indexed by lineno - 1
660 # list indexed by lineno - 1
661 fctxs = attr.ib()
661 fctxs = attr.ib()
662 linenos = attr.ib()
662 linenos = attr.ib()
663 skips = attr.ib()
663 skips = attr.ib()
664 # full file content
664 # full file content
665 text = attr.ib()
665 text = attr.ib()
666
666
667
667
668 def _countlines(text):
668 def _countlines(text):
669 if text.endswith(b"\n"):
669 if text.endswith(b"\n"):
670 return text.count(b"\n")
670 return text.count(b"\n")
671 return text.count(b"\n") + int(bool(text))
671 return text.count(b"\n") + int(bool(text))
672
672
673
673
674 def _decoratelines(text, fctx):
674 def _decoratelines(text, fctx):
675 n = _countlines(text)
675 n = _countlines(text)
676 linenos = pycompat.rangelist(1, n + 1)
676 linenos = pycompat.rangelist(1, n + 1)
677 return _annotatedfile([fctx] * n, linenos, [False] * n, text)
677 return _annotatedfile([fctx] * n, linenos, [False] * n, text)
678
678
679
679
680 def _annotatepair(parents, childfctx, child, skipchild, diffopts):
680 def _annotatepair(parents, childfctx, child, skipchild, diffopts):
681 r"""
681 r"""
682 Given parent and child fctxes and annotate data for parents, for all lines
682 Given parent and child fctxes and annotate data for parents, for all lines
683 in either parent that match the child, annotate the child with the parent's
683 in either parent that match the child, annotate the child with the parent's
684 data.
684 data.
685
685
686 Additionally, if `skipchild` is True, replace all other lines with parent
686 Additionally, if `skipchild` is True, replace all other lines with parent
687 annotate data as well such that child is never blamed for any lines.
687 annotate data as well such that child is never blamed for any lines.
688
688
689 See test-annotate.py for unit tests.
689 See test-annotate.py for unit tests.
690 """
690 """
691 pblocks = [
691 pblocks = [
692 (parent, mdiff.allblocks(parent.text, child.text, opts=diffopts))
692 (parent, mdiff.allblocks(parent.text, child.text, opts=diffopts))
693 for parent in parents
693 for parent in parents
694 ]
694 ]
695
695
696 if skipchild:
696 if skipchild:
697 # Need to iterate over the blocks twice -- make it a list
697 # Need to iterate over the blocks twice -- make it a list
698 pblocks = [(p, list(blocks)) for (p, blocks) in pblocks]
698 pblocks = [(p, list(blocks)) for (p, blocks) in pblocks]
699 # Mercurial currently prefers p2 over p1 for annotate.
699 # Mercurial currently prefers p2 over p1 for annotate.
700 # TODO: change this?
700 # TODO: change this?
701 for parent, blocks in pblocks:
701 for parent, blocks in pblocks:
702 for (a1, a2, b1, b2), t in blocks:
702 for (a1, a2, b1, b2), t in blocks:
703 # Changed blocks ('!') or blocks made only of blank lines ('~')
703 # Changed blocks ('!') or blocks made only of blank lines ('~')
704 # belong to the child.
704 # belong to the child.
705 if t == b'=':
705 if t == b'=':
706 child.fctxs[b1:b2] = parent.fctxs[a1:a2]
706 child.fctxs[b1:b2] = parent.fctxs[a1:a2]
707 child.linenos[b1:b2] = parent.linenos[a1:a2]
707 child.linenos[b1:b2] = parent.linenos[a1:a2]
708 child.skips[b1:b2] = parent.skips[a1:a2]
708 child.skips[b1:b2] = parent.skips[a1:a2]
709
709
710 if skipchild:
710 if skipchild:
711 # Now try and match up anything that couldn't be matched,
711 # Now try and match up anything that couldn't be matched,
712 # Reversing pblocks maintains bias towards p2, matching above
712 # Reversing pblocks maintains bias towards p2, matching above
713 # behavior.
713 # behavior.
714 pblocks.reverse()
714 pblocks.reverse()
715
715
716 # The heuristics are:
716 # The heuristics are:
717 # * Work on blocks of changed lines (effectively diff hunks with -U0).
717 # * Work on blocks of changed lines (effectively diff hunks with -U0).
718 # This could potentially be smarter but works well enough.
718 # This could potentially be smarter but works well enough.
719 # * For a non-matching section, do a best-effort fit. Match lines in
719 # * For a non-matching section, do a best-effort fit. Match lines in
720 # diff hunks 1:1, dropping lines as necessary.
720 # diff hunks 1:1, dropping lines as necessary.
721 # * Repeat the last line as a last resort.
721 # * Repeat the last line as a last resort.
722
722
723 # First, replace as much as possible without repeating the last line.
723 # First, replace as much as possible without repeating the last line.
724 remaining = [(parent, []) for parent, _blocks in pblocks]
724 remaining = [(parent, []) for parent, _blocks in pblocks]
725 for idx, (parent, blocks) in enumerate(pblocks):
725 for idx, (parent, blocks) in enumerate(pblocks):
726 for (a1, a2, b1, b2), _t in blocks:
726 for (a1, a2, b1, b2), _t in blocks:
727 if a2 - a1 >= b2 - b1:
727 if a2 - a1 >= b2 - b1:
728 for bk in range(b1, b2):
728 for bk in range(b1, b2):
729 if child.fctxs[bk] == childfctx:
729 if child.fctxs[bk] == childfctx:
730 ak = min(a1 + (bk - b1), a2 - 1)
730 ak = min(a1 + (bk - b1), a2 - 1)
731 child.fctxs[bk] = parent.fctxs[ak]
731 child.fctxs[bk] = parent.fctxs[ak]
732 child.linenos[bk] = parent.linenos[ak]
732 child.linenos[bk] = parent.linenos[ak]
733 child.skips[bk] = True
733 child.skips[bk] = True
734 else:
734 else:
735 remaining[idx][1].append((a1, a2, b1, b2))
735 remaining[idx][1].append((a1, a2, b1, b2))
736
736
737 # Then, look at anything left, which might involve repeating the last
737 # Then, look at anything left, which might involve repeating the last
738 # line.
738 # line.
739 for parent, blocks in remaining:
739 for parent, blocks in remaining:
740 for a1, a2, b1, b2 in blocks:
740 for a1, a2, b1, b2 in blocks:
741 for bk in range(b1, b2):
741 for bk in range(b1, b2):
742 if child.fctxs[bk] == childfctx:
742 if child.fctxs[bk] == childfctx:
743 ak = min(a1 + (bk - b1), a2 - 1)
743 ak = min(a1 + (bk - b1), a2 - 1)
744 child.fctxs[bk] = parent.fctxs[ak]
744 child.fctxs[bk] = parent.fctxs[ak]
745 child.linenos[bk] = parent.linenos[ak]
745 child.linenos[bk] = parent.linenos[ak]
746 child.skips[bk] = True
746 child.skips[bk] = True
747 return child
747 return child
748
748
749
749
750 def annotate(base, parents, skiprevs=None, diffopts=None):
750 def annotate(base, parents, skiprevs=None, diffopts=None):
751 """Core algorithm for filectx.annotate()
751 """Core algorithm for filectx.annotate()
752
752
753 `parents(fctx)` is a function returning a list of parent filectxs.
753 `parents(fctx)` is a function returning a list of parent filectxs.
754 """
754 """
755
755
756 # This algorithm would prefer to be recursive, but Python is a
756 # This algorithm would prefer to be recursive, but Python is a
757 # bit recursion-hostile. Instead we do an iterative
757 # bit recursion-hostile. Instead we do an iterative
758 # depth-first search.
758 # depth-first search.
759
759
760 # 1st DFS pre-calculates pcache and needed
760 # 1st DFS pre-calculates pcache and needed
761 visit = [base]
761 visit = [base]
762 pcache = {}
762 pcache = {}
763 needed = {base: 1}
763 needed = {base: 1}
764 while visit:
764 while visit:
765 f = visit.pop()
765 f = visit.pop()
766 if f in pcache:
766 if f in pcache:
767 continue
767 continue
768 pl = parents(f)
768 pl = parents(f)
769 pcache[f] = pl
769 pcache[f] = pl
770 for p in pl:
770 for p in pl:
771 needed[p] = needed.get(p, 0) + 1
771 needed[p] = needed.get(p, 0) + 1
772 if p not in pcache:
772 if p not in pcache:
773 visit.append(p)
773 visit.append(p)
774
774
775 # 2nd DFS does the actual annotate
775 # 2nd DFS does the actual annotate
776 visit[:] = [base]
776 visit[:] = [base]
777 hist = {}
777 hist = {}
778 while visit:
778 while visit:
779 f = visit[-1]
779 f = visit[-1]
780 if f in hist:
780 if f in hist:
781 visit.pop()
781 visit.pop()
782 continue
782 continue
783
783
784 ready = True
784 ready = True
785 pl = pcache[f]
785 pl = pcache[f]
786 for p in pl:
786 for p in pl:
787 if p not in hist:
787 if p not in hist:
788 ready = False
788 ready = False
789 visit.append(p)
789 visit.append(p)
790 if ready:
790 if ready:
791 visit.pop()
791 visit.pop()
792 curr = _decoratelines(f.data(), f)
792 curr = _decoratelines(f.data(), f)
793 skipchild = False
793 skipchild = False
794 if skiprevs is not None:
794 if skiprevs is not None:
795 skipchild = f._changeid in skiprevs
795 skipchild = f._changeid in skiprevs
796 curr = _annotatepair(
796 curr = _annotatepair(
797 [hist[p] for p in pl], f, curr, skipchild, diffopts
797 [hist[p] for p in pl], f, curr, skipchild, diffopts
798 )
798 )
799 for p in pl:
799 for p in pl:
800 if needed[p] == 1:
800 if needed[p] == 1:
801 del hist[p]
801 del hist[p]
802 del needed[p]
802 del needed[p]
803 else:
803 else:
804 needed[p] -= 1
804 needed[p] -= 1
805
805
806 hist[f] = curr
806 hist[f] = curr
807 del pcache[f]
807 del pcache[f]
808
808
809 a = hist[base]
809 a = hist[base]
810 return [
810 return [
811 annotateline(*r)
811 annotateline(*r)
812 for r in zip(a.fctxs, a.linenos, a.skips, mdiff.splitnewlines(a.text))
812 for r in zip(a.fctxs, a.linenos, a.skips, mdiff.splitnewlines(a.text))
813 ]
813 ]
814
814
815
815
816 def toposort(revs, parentsfunc, firstbranch=()):
816 def toposort(revs, parentsfunc, firstbranch=()):
817 """Yield revisions from heads to roots one (topo) branch at a time.
817 """Yield revisions from heads to roots one (topo) branch at a time.
818
818
819 This function aims to be used by a graph generator that wishes to minimize
819 This function aims to be used by a graph generator that wishes to minimize
820 the number of parallel branches and their interleaving.
820 the number of parallel branches and their interleaving.
821
821
822 Example iteration order (numbers show the "true" order in a changelog):
822 Example iteration order (numbers show the "true" order in a changelog):
823
823
824 o 4
824 o 4
825 |
825 |
826 o 1
826 o 1
827 |
827 |
828 | o 3
828 | o 3
829 | |
829 | |
830 | o 2
830 | o 2
831 |/
831 |/
832 o 0
832 o 0
833
833
834 Note that the ancestors of merges are understood by the current
834 Note that the ancestors of merges are understood by the current
835 algorithm to be on the same branch. This means no reordering will
835 algorithm to be on the same branch. This means no reordering will
836 occur behind a merge.
836 occur behind a merge.
837 """
837 """
838
838
839 ### Quick summary of the algorithm
839 ### Quick summary of the algorithm
840 #
840 #
841 # This function is based around a "retention" principle. We keep revisions
841 # This function is based around a "retention" principle. We keep revisions
842 # in memory until we are ready to emit a whole branch that immediately
842 # in memory until we are ready to emit a whole branch that immediately
843 # "merges" into an existing one. This reduces the number of parallel
843 # "merges" into an existing one. This reduces the number of parallel
844 # branches with interleaved revisions.
844 # branches with interleaved revisions.
845 #
845 #
846 # During iteration revs are split into two groups:
846 # During iteration revs are split into two groups:
847 # A) revision already emitted
847 # A) revision already emitted
848 # B) revision in "retention". They are stored as different subgroups.
848 # B) revision in "retention". They are stored as different subgroups.
849 #
849 #
850 # for each REV, we do the following logic:
850 # for each REV, we do the following logic:
851 #
851 #
852 # 1) if REV is a parent of (A), we will emit it. If there is a
852 # 1) if REV is a parent of (A), we will emit it. If there is a
853 # retention group ((B) above) that is blocked on REV being
853 # retention group ((B) above) that is blocked on REV being
854 # available, we emit all the revisions out of that retention
854 # available, we emit all the revisions out of that retention
855 # group first.
855 # group first.
856 #
856 #
857 # 2) else, we'll search for a subgroup in (B) awaiting for REV to be
857 # 2) else, we'll search for a subgroup in (B) awaiting for REV to be
858 # available, if such subgroup exist, we add REV to it and the subgroup is
858 # available, if such subgroup exist, we add REV to it and the subgroup is
859 # now awaiting for REV.parents() to be available.
859 # now awaiting for REV.parents() to be available.
860 #
860 #
861 # 3) finally if no such group existed in (B), we create a new subgroup.
861 # 3) finally if no such group existed in (B), we create a new subgroup.
862 #
862 #
863 #
863 #
864 # To bootstrap the algorithm, we emit the tipmost revision (which
864 # To bootstrap the algorithm, we emit the tipmost revision (which
865 # puts it in group (A) from above).
865 # puts it in group (A) from above).
866
866
867 revs.sort(reverse=True)
867 revs.sort(reverse=True)
868
868
869 # Set of parents of revision that have been emitted. They can be considered
869 # Set of parents of revision that have been emitted. They can be considered
870 # unblocked as the graph generator is already aware of them so there is no
870 # unblocked as the graph generator is already aware of them so there is no
871 # need to delay the revisions that reference them.
871 # need to delay the revisions that reference them.
872 #
872 #
873 # If someone wants to prioritize a branch over the others, pre-filling this
873 # If someone wants to prioritize a branch over the others, pre-filling this
874 # set will force all other branches to wait until this branch is ready to be
874 # set will force all other branches to wait until this branch is ready to be
875 # emitted.
875 # emitted.
876 unblocked = set(firstbranch)
876 unblocked = set(firstbranch)
877
877
878 # list of groups waiting to be displayed, each group is defined by:
878 # list of groups waiting to be displayed, each group is defined by:
879 #
879 #
880 # (revs: lists of revs waiting to be displayed,
880 # (revs: lists of revs waiting to be displayed,
881 # blocked: set of that cannot be displayed before those in 'revs')
881 # blocked: set of that cannot be displayed before those in 'revs')
882 #
882 #
883 # The second value ('blocked') correspond to parents of any revision in the
883 # The second value ('blocked') correspond to parents of any revision in the
884 # group ('revs') that is not itself contained in the group. The main idea
884 # group ('revs') that is not itself contained in the group. The main idea
885 # of this algorithm is to delay as much as possible the emission of any
885 # of this algorithm is to delay as much as possible the emission of any
886 # revision. This means waiting for the moment we are about to display
886 # revision. This means waiting for the moment we are about to display
887 # these parents to display the revs in a group.
887 # these parents to display the revs in a group.
888 #
888 #
889 # This first implementation is smart until it encounters a merge: it will
889 # This first implementation is smart until it encounters a merge: it will
890 # emit revs as soon as any parent is about to be emitted and can grow an
890 # emit revs as soon as any parent is about to be emitted and can grow an
891 # arbitrary number of revs in 'blocked'. In practice this mean we properly
891 # arbitrary number of revs in 'blocked'. In practice this mean we properly
892 # retains new branches but gives up on any special ordering for ancestors
892 # retains new branches but gives up on any special ordering for ancestors
893 # of merges. The implementation can be improved to handle this better.
893 # of merges. The implementation can be improved to handle this better.
894 #
894 #
895 # The first subgroup is special. It corresponds to all the revision that
895 # The first subgroup is special. It corresponds to all the revision that
896 # were already emitted. The 'revs' lists is expected to be empty and the
896 # were already emitted. The 'revs' lists is expected to be empty and the
897 # 'blocked' set contains the parents revisions of already emitted revision.
897 # 'blocked' set contains the parents revisions of already emitted revision.
898 #
898 #
899 # You could pre-seed the <parents> set of groups[0] to a specific
899 # You could pre-seed the <parents> set of groups[0] to a specific
900 # changesets to select what the first emitted branch should be.
900 # changesets to select what the first emitted branch should be.
901 groups = [([], unblocked)]
901 groups = [([], unblocked)]
902 pendingheap = []
902 pendingheap = []
903 pendingset = set()
903 pendingset = set()
904
904
905 heapq.heapify(pendingheap)
905 heapq.heapify(pendingheap)
906 heappop = heapq.heappop
906 heappop = heapq.heappop
907 heappush = heapq.heappush
907 heappush = heapq.heappush
908 for currentrev in revs:
908 for currentrev in revs:
909 # Heap works with smallest element, we want highest so we invert
909 # Heap works with smallest element, we want highest so we invert
910 if currentrev not in pendingset:
910 if currentrev not in pendingset:
911 heappush(pendingheap, -currentrev)
911 heappush(pendingheap, -currentrev)
912 pendingset.add(currentrev)
912 pendingset.add(currentrev)
913 # iterates on pending rev until after the current rev have been
913 # iterates on pending rev until after the current rev have been
914 # processed.
914 # processed.
915 rev = None
915 rev = None
916 while rev != currentrev:
916 while rev != currentrev:
917 rev = -heappop(pendingheap)
917 rev = -heappop(pendingheap)
918 pendingset.remove(rev)
918 pendingset.remove(rev)
919
919
920 # Seek for a subgroup blocked, waiting for the current revision.
920 # Seek for a subgroup blocked, waiting for the current revision.
921 matching = [i for i, g in enumerate(groups) if rev in g[1]]
921 matching = [i for i, g in enumerate(groups) if rev in g[1]]
922
922
923 if matching:
923 if matching:
924 # The main idea is to gather together all sets that are blocked
924 # The main idea is to gather together all sets that are blocked
925 # on the same revision.
925 # on the same revision.
926 #
926 #
927 # Groups are merged when a common blocking ancestor is
927 # Groups are merged when a common blocking ancestor is
928 # observed. For example, given two groups:
928 # observed. For example, given two groups:
929 #
929 #
930 # revs [5, 4] waiting for 1
930 # revs [5, 4] waiting for 1
931 # revs [3, 2] waiting for 1
931 # revs [3, 2] waiting for 1
932 #
932 #
933 # These two groups will be merged when we process
933 # These two groups will be merged when we process
934 # 1. In theory, we could have merged the groups when
934 # 1. In theory, we could have merged the groups when
935 # we added 2 to the group it is now in (we could have
935 # we added 2 to the group it is now in (we could have
936 # noticed the groups were both blocked on 1 then), but
936 # noticed the groups were both blocked on 1 then), but
937 # the way it works now makes the algorithm simpler.
937 # the way it works now makes the algorithm simpler.
938 #
938 #
939 # We also always keep the oldest subgroup first. We can
939 # We also always keep the oldest subgroup first. We can
940 # probably improve the behavior by having the longest set
940 # probably improve the behavior by having the longest set
941 # first. That way, graph algorithms could minimise the length
941 # first. That way, graph algorithms could minimise the length
942 # of parallel lines their drawing. This is currently not done.
942 # of parallel lines their drawing. This is currently not done.
943 targetidx = matching.pop(0)
943 targetidx = matching.pop(0)
944 trevs, tparents = groups[targetidx]
944 trevs, tparents = groups[targetidx]
945 for i in matching:
945 for i in matching:
946 gr = groups[i]
946 gr = groups[i]
947 trevs.extend(gr[0])
947 trevs.extend(gr[0])
948 tparents |= gr[1]
948 tparents |= gr[1]
949 # delete all merged subgroups (except the one we kept)
949 # delete all merged subgroups (except the one we kept)
950 # (starting from the last subgroup for performance and
950 # (starting from the last subgroup for performance and
951 # sanity reasons)
951 # sanity reasons)
952 for i in reversed(matching):
952 for i in reversed(matching):
953 del groups[i]
953 del groups[i]
954 else:
954 else:
955 # This is a new head. We create a new subgroup for it.
955 # This is a new head. We create a new subgroup for it.
956 targetidx = len(groups)
956 targetidx = len(groups)
957 groups.append(([], {rev}))
957 groups.append(([], {rev}))
958
958
959 gr = groups[targetidx]
959 gr = groups[targetidx]
960
960
961 # We now add the current nodes to this subgroups. This is done
961 # We now add the current nodes to this subgroups. This is done
962 # after the subgroup merging because all elements from a subgroup
962 # after the subgroup merging because all elements from a subgroup
963 # that relied on this rev must precede it.
963 # that relied on this rev must precede it.
964 #
964 #
965 # we also update the <parents> set to include the parents of the
965 # we also update the <parents> set to include the parents of the
966 # new nodes.
966 # new nodes.
967 if rev == currentrev: # only display stuff in rev
967 if rev == currentrev: # only display stuff in rev
968 gr[0].append(rev)
968 gr[0].append(rev)
969 gr[1].remove(rev)
969 gr[1].remove(rev)
970 parents = [p for p in parentsfunc(rev) if p > nullrev]
970 parents = [p for p in parentsfunc(rev) if p > nullrev]
971 gr[1].update(parents)
971 gr[1].update(parents)
972 for p in parents:
972 for p in parents:
973 if p not in pendingset:
973 if p not in pendingset:
974 pendingset.add(p)
974 pendingset.add(p)
975 heappush(pendingheap, -p)
975 heappush(pendingheap, -p)
976
976
977 # Look for a subgroup to display
977 # Look for a subgroup to display
978 #
978 #
979 # When unblocked is empty (if clause), we were not waiting for any
979 # When unblocked is empty (if clause), we were not waiting for any
980 # revisions during the first iteration (if no priority was given) or
980 # revisions during the first iteration (if no priority was given) or
981 # if we emitted a whole disconnected set of the graph (reached a
981 # if we emitted a whole disconnected set of the graph (reached a
982 # root). In that case we arbitrarily take the oldest known
982 # root). In that case we arbitrarily take the oldest known
983 # subgroup. The heuristic could probably be better.
983 # subgroup. The heuristic could probably be better.
984 #
984 #
985 # Otherwise (elif clause) if the subgroup is blocked on
985 # Otherwise (elif clause) if the subgroup is blocked on
986 # a revision we just emitted, we can safely emit it as
986 # a revision we just emitted, we can safely emit it as
987 # well.
987 # well.
988 if not unblocked:
988 if not unblocked:
989 if len(groups) > 1: # display other subset
989 if len(groups) > 1: # display other subset
990 targetidx = 1
990 targetidx = 1
991 gr = groups[1]
991 gr = groups[1]
992 elif not gr[1] & unblocked:
992 elif not gr[1] & unblocked:
993 gr = None
993 gr = None
994
994
995 if gr is not None:
995 if gr is not None:
996 # update the set of awaited revisions with the one from the
996 # update the set of awaited revisions with the one from the
997 # subgroup
997 # subgroup
998 unblocked |= gr[1]
998 unblocked |= gr[1]
999 # output all revisions in the subgroup
999 # output all revisions in the subgroup
1000 for r in gr[0]:
1000 for r in gr[0]:
1001 yield r
1001 yield r
1002 # delete the subgroup that you just output
1002 # delete the subgroup that you just output
1003 # unless it is groups[0] in which case you just empty it.
1003 # unless it is groups[0] in which case you just empty it.
1004 if targetidx:
1004 if targetidx:
1005 del groups[targetidx]
1005 del groups[targetidx]
1006 else:
1006 else:
1007 gr[0][:] = []
1007 gr[0][:] = []
1008 # Check if we have some subgroup waiting for revisions we are not going to
1008 # Check if we have some subgroup waiting for revisions we are not going to
1009 # iterate over
1009 # iterate over
1010 for g in groups:
1010 for g in groups:
1011 for r in g[0]:
1011 for r in g[0]:
1012 yield r
1012 yield r
1013
1013
1014
1014
1015 def headrevs(revs, parentsfn):
1015 def headrevs(revs, parentsfn):
1016 """Resolve the set of heads from a set of revisions.
1016 """Resolve the set of heads from a set of revisions.
1017
1017
1018 Receives an iterable of revision numbers and a callbable that receives a
1018 Receives an iterable of revision numbers and a callbable that receives a
1019 revision number and returns an iterable of parent revision numbers, possibly
1019 revision number and returns an iterable of parent revision numbers, possibly
1020 including nullrev.
1020 including nullrev.
1021
1021
1022 Returns a set of revision numbers that are DAG heads within the passed
1022 Returns a set of revision numbers that are DAG heads within the passed
1023 subset.
1023 subset.
1024
1024
1025 ``nullrev`` is never included in the returned set, even if it is provided in
1025 ``nullrev`` is never included in the returned set, even if it is provided in
1026 the input set.
1026 the input set.
1027 """
1027 """
1028 headrevs = set(revs)
1028 headrevs = set(revs)
1029 parents = {nullrev}
1029 parents = {nullrev}
1030 up = parents.update
1030 up = parents.update
1031
1031
1032 for rev in revs:
1032 for rev in revs:
1033 up(parentsfn(rev))
1033 up(parentsfn(rev))
1034 headrevs.difference_update(parents)
1034 headrevs.difference_update(parents)
1035 return headrevs
1035 return headrevs
1036
1036
1037
1037
1038 def headrevsdiff(parentsfn, start, stop):
1039 """Compute how the set of heads changed between
1040 revisions `start-1` and `stop-1`.
1041 """
1042 parents = set()
1043
1044 heads_added = set()
1045 heads_removed = set()
1046
1047 for rev in range(stop - 1, start - 1, -1):
1048 if rev in parents:
1049 parents.remove(rev)
1050 else:
1051 heads_added.add(rev)
1052 for p in parentsfn(rev):
1053 parents.add(p)
1054
1055 # now `parents` is the collection of candidate removed heads
1056 rev = start - 1
1057 while parents:
1058 if rev in parents:
1059 heads_removed.add(rev)
1060 parents.remove(rev)
1061
1062 for p in parentsfn(rev):
1063 parents.discard(p)
1064 rev = rev - 1
1065
1066 return (heads_removed, heads_added)
1067
1068
1038 def headrevssubset(revsfn, parentrevsfn, startrev=None, stoprevs=None):
1069 def headrevssubset(revsfn, parentrevsfn, startrev=None, stoprevs=None):
1039 """Returns the set of all revs that have no children with control.
1070 """Returns the set of all revs that have no children with control.
1040
1071
1041 ``revsfn`` is a callable that with no arguments returns an iterator over
1072 ``revsfn`` is a callable that with no arguments returns an iterator over
1042 all revision numbers in topological order. With a ``start`` argument, it
1073 all revision numbers in topological order. With a ``start`` argument, it
1043 returns revision numbers starting at that number.
1074 returns revision numbers starting at that number.
1044
1075
1045 ``parentrevsfn`` is a callable receiving a revision number and returns an
1076 ``parentrevsfn`` is a callable receiving a revision number and returns an
1046 iterable of parent revision numbers, where values can include nullrev.
1077 iterable of parent revision numbers, where values can include nullrev.
1047
1078
1048 ``startrev`` is a revision number at which to start the search.
1079 ``startrev`` is a revision number at which to start the search.
1049
1080
1050 ``stoprevs`` is an iterable of revision numbers that, when encountered,
1081 ``stoprevs`` is an iterable of revision numbers that, when encountered,
1051 will stop DAG traversal beyond them. Parents of revisions in this
1082 will stop DAG traversal beyond them. Parents of revisions in this
1052 collection will be heads.
1083 collection will be heads.
1053 """
1084 """
1054 if startrev is None:
1085 if startrev is None:
1055 startrev = nullrev
1086 startrev = nullrev
1056
1087
1057 stoprevs = set(stoprevs or [])
1088 stoprevs = set(stoprevs or [])
1058
1089
1059 reachable = {startrev}
1090 reachable = {startrev}
1060 heads = {startrev}
1091 heads = {startrev}
1061
1092
1062 for rev in revsfn(start=startrev + 1):
1093 for rev in revsfn(start=startrev + 1):
1063 for prev in parentrevsfn(rev):
1094 for prev in parentrevsfn(rev):
1064 if prev in reachable:
1095 if prev in reachable:
1065 if rev not in stoprevs:
1096 if rev not in stoprevs:
1066 reachable.add(rev)
1097 reachable.add(rev)
1067 heads.add(rev)
1098 heads.add(rev)
1068
1099
1069 if prev in heads and prev not in stoprevs:
1100 if prev in heads and prev not in stoprevs:
1070 heads.remove(prev)
1101 heads.remove(prev)
1071
1102
1072 return heads
1103 return heads
1073
1104
1074
1105
1075 def linearize(revs, parentsfn):
1106 def linearize(revs, parentsfn):
1076 """Linearize and topologically sort a list of revisions.
1107 """Linearize and topologically sort a list of revisions.
1077
1108
1078 The linearization process tries to create long runs of revs where a child
1109 The linearization process tries to create long runs of revs where a child
1079 rev comes immediately after its first parent. This is done by visiting the
1110 rev comes immediately after its first parent. This is done by visiting the
1080 heads of the revs in inverse topological order, and for each visited rev,
1111 heads of the revs in inverse topological order, and for each visited rev,
1081 visiting its second parent, then its first parent, then adding the rev
1112 visiting its second parent, then its first parent, then adding the rev
1082 itself to the output list.
1113 itself to the output list.
1083
1114
1084 Returns a list of revision numbers.
1115 Returns a list of revision numbers.
1085 """
1116 """
1086 visit = list(sorted(headrevs(revs, parentsfn), reverse=True))
1117 visit = list(sorted(headrevs(revs, parentsfn), reverse=True))
1087 finished = set()
1118 finished = set()
1088 result = []
1119 result = []
1089
1120
1090 while visit:
1121 while visit:
1091 rev = visit.pop()
1122 rev = visit.pop()
1092 if rev < 0:
1123 if rev < 0:
1093 rev = -rev - 1
1124 rev = -rev - 1
1094
1125
1095 if rev not in finished:
1126 if rev not in finished:
1096 result.append(rev)
1127 result.append(rev)
1097 finished.add(rev)
1128 finished.add(rev)
1098
1129
1099 else:
1130 else:
1100 visit.append(-rev - 1)
1131 visit.append(-rev - 1)
1101
1132
1102 for prev in parentsfn(rev):
1133 for prev in parentsfn(rev):
1103 if prev == nullrev or prev not in revs or prev in finished:
1134 if prev == nullrev or prev not in revs or prev in finished:
1104 continue
1135 continue
1105
1136
1106 visit.append(prev)
1137 visit.append(prev)
1107
1138
1108 assert len(result) == len(revs)
1139 assert len(result) == len(revs)
1109
1140
1110 return result
1141 return result
@@ -1,4065 +1,4077 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import functools
19 import functools
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import weakref
23 import weakref
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullrev,
30 nullrev,
31 sha1nodeconstants,
31 sha1nodeconstants,
32 short,
32 short,
33 wdirrev,
33 wdirrev,
34 )
34 )
35 from .i18n import _
35 from .i18n import _
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 CHANGELOGV2,
38 CHANGELOGV2,
39 COMP_MODE_DEFAULT,
39 COMP_MODE_DEFAULT,
40 COMP_MODE_INLINE,
40 COMP_MODE_INLINE,
41 COMP_MODE_PLAIN,
41 COMP_MODE_PLAIN,
42 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_NO,
43 DELTA_BASE_REUSE_TRY,
43 DELTA_BASE_REUSE_TRY,
44 ENTRY_RANK,
44 ENTRY_RANK,
45 FEATURES_BY_VERSION,
45 FEATURES_BY_VERSION,
46 FLAG_GENERALDELTA,
46 FLAG_GENERALDELTA,
47 FLAG_INLINE_DATA,
47 FLAG_INLINE_DATA,
48 INDEX_HEADER,
48 INDEX_HEADER,
49 KIND_CHANGELOG,
49 KIND_CHANGELOG,
50 KIND_FILELOG,
50 KIND_FILELOG,
51 RANK_UNKNOWN,
51 RANK_UNKNOWN,
52 REVLOGV0,
52 REVLOGV0,
53 REVLOGV1,
53 REVLOGV1,
54 REVLOGV1_FLAGS,
54 REVLOGV1_FLAGS,
55 REVLOGV2,
55 REVLOGV2,
56 REVLOGV2_FLAGS,
56 REVLOGV2_FLAGS,
57 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FLAGS,
58 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_FORMAT,
59 REVLOG_DEFAULT_VERSION,
59 REVLOG_DEFAULT_VERSION,
60 SUPPORTED_FLAGS,
60 SUPPORTED_FLAGS,
61 )
61 )
62 from .revlogutils.flagutil import (
62 from .revlogutils.flagutil import (
63 REVIDX_DEFAULT_FLAGS,
63 REVIDX_DEFAULT_FLAGS,
64 REVIDX_ELLIPSIS,
64 REVIDX_ELLIPSIS,
65 REVIDX_EXTSTORED,
65 REVIDX_EXTSTORED,
66 REVIDX_FLAGS_ORDER,
66 REVIDX_FLAGS_ORDER,
67 REVIDX_HASCOPIESINFO,
67 REVIDX_HASCOPIESINFO,
68 REVIDX_ISCENSORED,
68 REVIDX_ISCENSORED,
69 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 REVIDX_RAWTEXT_CHANGING_FLAGS,
70 )
70 )
71 from .thirdparty import attr
71 from .thirdparty import attr
72 from . import (
72 from . import (
73 ancestor,
73 ancestor,
74 dagop,
74 dagop,
75 error,
75 error,
76 mdiff,
76 mdiff,
77 policy,
77 policy,
78 pycompat,
78 pycompat,
79 revlogutils,
79 revlogutils,
80 templatefilters,
80 templatefilters,
81 util,
81 util,
82 )
82 )
83 from .interfaces import (
83 from .interfaces import (
84 repository,
84 repository,
85 util as interfaceutil,
85 util as interfaceutil,
86 )
86 )
87 from .revlogutils import (
87 from .revlogutils import (
88 deltas as deltautil,
88 deltas as deltautil,
89 docket as docketutil,
89 docket as docketutil,
90 flagutil,
90 flagutil,
91 nodemap as nodemaputil,
91 nodemap as nodemaputil,
92 randomaccessfile,
92 randomaccessfile,
93 revlogv0,
93 revlogv0,
94 rewrite,
94 rewrite,
95 sidedata as sidedatautil,
95 sidedata as sidedatautil,
96 )
96 )
97 from .utils import (
97 from .utils import (
98 storageutil,
98 storageutil,
99 stringutil,
99 stringutil,
100 )
100 )
101
101
102 # blanked usage of all the name to prevent pyflakes constraints
102 # blanked usage of all the name to prevent pyflakes constraints
103 # We need these name available in the module for extensions.
103 # We need these name available in the module for extensions.
104
104
105 REVLOGV0
105 REVLOGV0
106 REVLOGV1
106 REVLOGV1
107 REVLOGV2
107 REVLOGV2
108 CHANGELOGV2
108 CHANGELOGV2
109 FLAG_INLINE_DATA
109 FLAG_INLINE_DATA
110 FLAG_GENERALDELTA
110 FLAG_GENERALDELTA
111 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FLAGS
112 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_FORMAT
113 REVLOG_DEFAULT_VERSION
113 REVLOG_DEFAULT_VERSION
114 REVLOGV1_FLAGS
114 REVLOGV1_FLAGS
115 REVLOGV2_FLAGS
115 REVLOGV2_FLAGS
116 REVIDX_ISCENSORED
116 REVIDX_ISCENSORED
117 REVIDX_ELLIPSIS
117 REVIDX_ELLIPSIS
118 REVIDX_HASCOPIESINFO
118 REVIDX_HASCOPIESINFO
119 REVIDX_EXTSTORED
119 REVIDX_EXTSTORED
120 REVIDX_DEFAULT_FLAGS
120 REVIDX_DEFAULT_FLAGS
121 REVIDX_FLAGS_ORDER
121 REVIDX_FLAGS_ORDER
122 REVIDX_RAWTEXT_CHANGING_FLAGS
122 REVIDX_RAWTEXT_CHANGING_FLAGS
123
123
124 parsers = policy.importmod('parsers')
124 parsers = policy.importmod('parsers')
125 rustancestor = policy.importrust('ancestor')
125 rustancestor = policy.importrust('ancestor')
126 rustdagop = policy.importrust('dagop')
126 rustdagop = policy.importrust('dagop')
127 rustrevlog = policy.importrust('revlog')
127 rustrevlog = policy.importrust('revlog')
128
128
129 # Aliased for performance.
129 # Aliased for performance.
130 _zlibdecompress = zlib.decompress
130 _zlibdecompress = zlib.decompress
131
131
132 # max size of inline data embedded into a revlog
132 # max size of inline data embedded into a revlog
133 _maxinline = 131072
133 _maxinline = 131072
134
134
135
135 # Flag processors for REVIDX_ELLIPSIS.
136 # Flag processors for REVIDX_ELLIPSIS.
136 def ellipsisreadprocessor(rl, text):
137 def ellipsisreadprocessor(rl, text):
137 return text, False
138 return text, False
138
139
139
140
140 def ellipsiswriteprocessor(rl, text):
141 def ellipsiswriteprocessor(rl, text):
141 return text, False
142 return text, False
142
143
143
144
144 def ellipsisrawprocessor(rl, text):
145 def ellipsisrawprocessor(rl, text):
145 return False
146 return False
146
147
147
148
148 ellipsisprocessor = (
149 ellipsisprocessor = (
149 ellipsisreadprocessor,
150 ellipsisreadprocessor,
150 ellipsiswriteprocessor,
151 ellipsiswriteprocessor,
151 ellipsisrawprocessor,
152 ellipsisrawprocessor,
152 )
153 )
153
154
154
155
155 def _verify_revision(rl, skipflags, state, node):
156 def _verify_revision(rl, skipflags, state, node):
156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 """Verify the integrity of the given revlog ``node`` while providing a hook
157 point for extensions to influence the operation."""
158 point for extensions to influence the operation."""
158 if skipflags:
159 if skipflags:
159 state[b'skipread'].add(node)
160 state[b'skipread'].add(node)
160 else:
161 else:
161 # Side-effect: read content and verify hash.
162 # Side-effect: read content and verify hash.
162 rl.revision(node)
163 rl.revision(node)
163
164
164
165
165 # True if a fast implementation for persistent-nodemap is available
166 # True if a fast implementation for persistent-nodemap is available
166 #
167 #
167 # We also consider we have a "fast" implementation in "pure" python because
168 # We also consider we have a "fast" implementation in "pure" python because
168 # people using pure don't really have performance consideration (and a
169 # people using pure don't really have performance consideration (and a
169 # wheelbarrow of other slowness source)
170 # wheelbarrow of other slowness source)
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
171 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
171 parsers, 'BaseIndexObject'
172 parsers, 'BaseIndexObject'
172 )
173 )
173
174
174
175
175 @interfaceutil.implementer(repository.irevisiondelta)
176 @interfaceutil.implementer(repository.irevisiondelta)
176 @attr.s(slots=True)
177 @attr.s(slots=True)
177 class revlogrevisiondelta:
178 class revlogrevisiondelta:
178 node = attr.ib()
179 node = attr.ib()
179 p1node = attr.ib()
180 p1node = attr.ib()
180 p2node = attr.ib()
181 p2node = attr.ib()
181 basenode = attr.ib()
182 basenode = attr.ib()
182 flags = attr.ib()
183 flags = attr.ib()
183 baserevisionsize = attr.ib()
184 baserevisionsize = attr.ib()
184 revision = attr.ib()
185 revision = attr.ib()
185 delta = attr.ib()
186 delta = attr.ib()
186 sidedata = attr.ib()
187 sidedata = attr.ib()
187 protocol_flags = attr.ib()
188 protocol_flags = attr.ib()
188 linknode = attr.ib(default=None)
189 linknode = attr.ib(default=None)
189
190
190
191
191 @interfaceutil.implementer(repository.iverifyproblem)
192 @interfaceutil.implementer(repository.iverifyproblem)
192 @attr.s(frozen=True)
193 @attr.s(frozen=True)
193 class revlogproblem:
194 class revlogproblem:
194 warning = attr.ib(default=None)
195 warning = attr.ib(default=None)
195 error = attr.ib(default=None)
196 error = attr.ib(default=None)
196 node = attr.ib(default=None)
197 node = attr.ib(default=None)
197
198
198
199
199 def parse_index_v1(data, inline):
200 def parse_index_v1(data, inline):
200 # call the C implementation to parse the index data
201 # call the C implementation to parse the index data
201 index, cache = parsers.parse_index2(data, inline)
202 index, cache = parsers.parse_index2(data, inline)
202 return index, cache
203 return index, cache
203
204
204
205
205 def parse_index_v2(data, inline):
206 def parse_index_v2(data, inline):
206 # call the C implementation to parse the index data
207 # call the C implementation to parse the index data
207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
208 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
208 return index, cache
209 return index, cache
209
210
210
211
211 def parse_index_cl_v2(data, inline):
212 def parse_index_cl_v2(data, inline):
212 # call the C implementation to parse the index data
213 # call the C implementation to parse the index data
213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
214 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
214 return index, cache
215 return index, cache
215
216
216
217
217 if hasattr(parsers, 'parse_index_devel_nodemap'):
218 if hasattr(parsers, 'parse_index_devel_nodemap'):
218
219
219 def parse_index_v1_nodemap(data, inline):
220 def parse_index_v1_nodemap(data, inline):
220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
221 index, cache = parsers.parse_index_devel_nodemap(data, inline)
221 return index, cache
222 return index, cache
222
223
223
224
224 else:
225 else:
225 parse_index_v1_nodemap = None
226 parse_index_v1_nodemap = None
226
227
227
228
228 def parse_index_v1_rust(data, inline, default_header):
229 def parse_index_v1_rust(data, inline, default_header):
229 cache = (0, data) if inline else None
230 cache = (0, data) if inline else None
230 return rustrevlog.Index(data, default_header), cache
231 return rustrevlog.Index(data, default_header), cache
231
232
232
233
233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
234 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
234 # signed integer)
235 # signed integer)
235 _maxentrysize = 0x7FFFFFFF
236 _maxentrysize = 0x7FFFFFFF
236
237
237 FILE_TOO_SHORT_MSG = _(
238 FILE_TOO_SHORT_MSG = _(
238 b'cannot read from revlog %s;'
239 b'cannot read from revlog %s;'
239 b' expected %d bytes from offset %d, data size is %d'
240 b' expected %d bytes from offset %d, data size is %d'
240 )
241 )
241
242
242 hexdigits = b'0123456789abcdefABCDEF'
243 hexdigits = b'0123456789abcdefABCDEF'
243
244
244
245
245 class _Config:
246 class _Config:
246 def copy(self):
247 def copy(self):
247 return self.__class__(**self.__dict__)
248 return self.__class__(**self.__dict__)
248
249
249
250
250 @attr.s()
251 @attr.s()
251 class FeatureConfig(_Config):
252 class FeatureConfig(_Config):
252 """Hold configuration values about the available revlog features"""
253 """Hold configuration values about the available revlog features"""
253
254
254 # the default compression engine
255 # the default compression engine
255 compression_engine = attr.ib(default=b'zlib')
256 compression_engine = attr.ib(default=b'zlib')
256 # compression engines options
257 # compression engines options
257 compression_engine_options = attr.ib(default=attr.Factory(dict))
258 compression_engine_options = attr.ib(default=attr.Factory(dict))
258
259
259 # can we use censor on this revlog
260 # can we use censor on this revlog
260 censorable = attr.ib(default=False)
261 censorable = attr.ib(default=False)
261 # does this revlog use the "side data" feature
262 # does this revlog use the "side data" feature
262 has_side_data = attr.ib(default=False)
263 has_side_data = attr.ib(default=False)
263 # might remove rank configuration once the computation has no impact
264 # might remove rank configuration once the computation has no impact
264 compute_rank = attr.ib(default=False)
265 compute_rank = attr.ib(default=False)
265 # parent order is supposed to be semantically irrelevant, so we
266 # parent order is supposed to be semantically irrelevant, so we
266 # normally resort parents to ensure that the first parent is non-null,
267 # normally resort parents to ensure that the first parent is non-null,
267 # if there is a non-null parent at all.
268 # if there is a non-null parent at all.
268 # filelog abuses the parent order as flag to mark some instances of
269 # filelog abuses the parent order as flag to mark some instances of
269 # meta-encoded files, so allow it to disable this behavior.
270 # meta-encoded files, so allow it to disable this behavior.
270 canonical_parent_order = attr.ib(default=False)
271 canonical_parent_order = attr.ib(default=False)
271 # can ellipsis commit be used
272 # can ellipsis commit be used
272 enable_ellipsis = attr.ib(default=False)
273 enable_ellipsis = attr.ib(default=False)
273
274
274 def copy(self):
275 def copy(self):
275 new = super().copy()
276 new = super().copy()
276 new.compression_engine_options = self.compression_engine_options.copy()
277 new.compression_engine_options = self.compression_engine_options.copy()
277 return new
278 return new
278
279
279
280
280 @attr.s()
281 @attr.s()
281 class DataConfig(_Config):
282 class DataConfig(_Config):
282 """Hold configuration value about how the revlog data are read"""
283 """Hold configuration value about how the revlog data are read"""
283
284
284 # should we try to open the "pending" version of the revlog
285 # should we try to open the "pending" version of the revlog
285 try_pending = attr.ib(default=False)
286 try_pending = attr.ib(default=False)
286 # should we try to open the "splitted" version of the revlog
287 # should we try to open the "splitted" version of the revlog
287 try_split = attr.ib(default=False)
288 try_split = attr.ib(default=False)
288 # When True, indexfile should be opened with checkambig=True at writing,
289 # When True, indexfile should be opened with checkambig=True at writing,
289 # to avoid file stat ambiguity.
290 # to avoid file stat ambiguity.
290 check_ambig = attr.ib(default=False)
291 check_ambig = attr.ib(default=False)
291
292
292 # If true, use mmap instead of reading to deal with large index
293 # If true, use mmap instead of reading to deal with large index
293 mmap_large_index = attr.ib(default=False)
294 mmap_large_index = attr.ib(default=False)
294 # how much data is large
295 # how much data is large
295 mmap_index_threshold = attr.ib(default=None)
296 mmap_index_threshold = attr.ib(default=None)
296 # How much data to read and cache into the raw revlog data cache.
297 # How much data to read and cache into the raw revlog data cache.
297 chunk_cache_size = attr.ib(default=65536)
298 chunk_cache_size = attr.ib(default=65536)
298
299
299 # The size of the uncompressed cache compared to the largest revision seen.
300 # The size of the uncompressed cache compared to the largest revision seen.
300 uncompressed_cache_factor = attr.ib(default=None)
301 uncompressed_cache_factor = attr.ib(default=None)
301
302
302 # The number of chunk cached
303 # The number of chunk cached
303 uncompressed_cache_count = attr.ib(default=None)
304 uncompressed_cache_count = attr.ib(default=None)
304
305
305 # Allow sparse reading of the revlog data
306 # Allow sparse reading of the revlog data
306 with_sparse_read = attr.ib(default=False)
307 with_sparse_read = attr.ib(default=False)
307 # minimal density of a sparse read chunk
308 # minimal density of a sparse read chunk
308 sr_density_threshold = attr.ib(default=0.50)
309 sr_density_threshold = attr.ib(default=0.50)
309 # minimal size of data we skip when performing sparse read
310 # minimal size of data we skip when performing sparse read
310 sr_min_gap_size = attr.ib(default=262144)
311 sr_min_gap_size = attr.ib(default=262144)
311
312
312 # are delta encoded against arbitrary bases.
313 # are delta encoded against arbitrary bases.
313 generaldelta = attr.ib(default=False)
314 generaldelta = attr.ib(default=False)
314
315
315
316
316 @attr.s()
317 @attr.s()
317 class DeltaConfig(_Config):
318 class DeltaConfig(_Config):
318 """Hold configuration value about how new delta are computed
319 """Hold configuration value about how new delta are computed
319
320
320 Some attributes are duplicated from DataConfig to help havign each object
321 Some attributes are duplicated from DataConfig to help havign each object
321 self contained.
322 self contained.
322 """
323 """
323
324
324 # can delta be encoded against arbitrary bases.
325 # can delta be encoded against arbitrary bases.
325 general_delta = attr.ib(default=False)
326 general_delta = attr.ib(default=False)
326 # Allow sparse writing of the revlog data
327 # Allow sparse writing of the revlog data
327 sparse_revlog = attr.ib(default=False)
328 sparse_revlog = attr.ib(default=False)
328 # maximum length of a delta chain
329 # maximum length of a delta chain
329 max_chain_len = attr.ib(default=None)
330 max_chain_len = attr.ib(default=None)
330 # Maximum distance between delta chain base start and end
331 # Maximum distance between delta chain base start and end
331 max_deltachain_span = attr.ib(default=-1)
332 max_deltachain_span = attr.ib(default=-1)
332 # If `upper_bound_comp` is not None, this is the expected maximal gain from
333 # If `upper_bound_comp` is not None, this is the expected maximal gain from
333 # compression for the data content.
334 # compression for the data content.
334 upper_bound_comp = attr.ib(default=None)
335 upper_bound_comp = attr.ib(default=None)
335 # Should we try a delta against both parent
336 # Should we try a delta against both parent
336 delta_both_parents = attr.ib(default=True)
337 delta_both_parents = attr.ib(default=True)
337 # Test delta base candidate group by chunk of this maximal size.
338 # Test delta base candidate group by chunk of this maximal size.
338 candidate_group_chunk_size = attr.ib(default=0)
339 candidate_group_chunk_size = attr.ib(default=0)
339 # Should we display debug information about delta computation
340 # Should we display debug information about delta computation
340 debug_delta = attr.ib(default=False)
341 debug_delta = attr.ib(default=False)
341 # trust incoming delta by default
342 # trust incoming delta by default
342 lazy_delta = attr.ib(default=True)
343 lazy_delta = attr.ib(default=True)
343 # trust the base of incoming delta by default
344 # trust the base of incoming delta by default
344 lazy_delta_base = attr.ib(default=False)
345 lazy_delta_base = attr.ib(default=False)
345
346
346
347
347 class _InnerRevlog:
348 class _InnerRevlog:
348 """An inner layer of the revlog object
349 """An inner layer of the revlog object
349
350
350 That layer exist to be able to delegate some operation to Rust, its
351 That layer exist to be able to delegate some operation to Rust, its
351 boundaries are arbitrary and based on what we can delegate to Rust.
352 boundaries are arbitrary and based on what we can delegate to Rust.
352 """
353 """
353
354
354 def __init__(
355 def __init__(
355 self,
356 self,
356 opener,
357 opener,
357 index,
358 index,
358 index_file,
359 index_file,
359 data_file,
360 data_file,
360 sidedata_file,
361 sidedata_file,
361 inline,
362 inline,
362 data_config,
363 data_config,
363 delta_config,
364 delta_config,
364 feature_config,
365 feature_config,
365 chunk_cache,
366 chunk_cache,
366 default_compression_header,
367 default_compression_header,
367 ):
368 ):
368 self.opener = opener
369 self.opener = opener
369 self.index = index
370 self.index = index
370
371
371 self.index_file = index_file
372 self.index_file = index_file
372 self.data_file = data_file
373 self.data_file = data_file
373 self.sidedata_file = sidedata_file
374 self.sidedata_file = sidedata_file
374 self.inline = inline
375 self.inline = inline
375 self.data_config = data_config
376 self.data_config = data_config
376 self.delta_config = delta_config
377 self.delta_config = delta_config
377 self.feature_config = feature_config
378 self.feature_config = feature_config
378
379
379 # used during diverted write.
380 # used during diverted write.
380 self._orig_index_file = None
381 self._orig_index_file = None
381
382
382 self._default_compression_header = default_compression_header
383 self._default_compression_header = default_compression_header
383
384
384 # index
385 # index
385
386
386 # 3-tuple of file handles being used for active writing.
387 # 3-tuple of file handles being used for active writing.
387 self._writinghandles = None
388 self._writinghandles = None
388
389
389 self._segmentfile = randomaccessfile.randomaccessfile(
390 self._segmentfile = randomaccessfile.randomaccessfile(
390 self.opener,
391 self.opener,
391 (self.index_file if self.inline else self.data_file),
392 (self.index_file if self.inline else self.data_file),
392 self.data_config.chunk_cache_size,
393 self.data_config.chunk_cache_size,
393 chunk_cache,
394 chunk_cache,
394 )
395 )
395 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
396 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
396 self.opener,
397 self.opener,
397 self.sidedata_file,
398 self.sidedata_file,
398 self.data_config.chunk_cache_size,
399 self.data_config.chunk_cache_size,
399 )
400 )
400
401
401 # revlog header -> revlog compressor
402 # revlog header -> revlog compressor
402 self._decompressors = {}
403 self._decompressors = {}
403 # 3-tuple of (node, rev, text) for a raw revision.
404 # 3-tuple of (node, rev, text) for a raw revision.
404 self._revisioncache = None
405 self._revisioncache = None
405
406
406 # cache some uncompressed chunks
407 # cache some uncompressed chunks
407 # rev β†’ uncompressed_chunk
408 # rev β†’ uncompressed_chunk
408 #
409 #
409 # the max cost is dynamically updated to be proportionnal to the
410 # the max cost is dynamically updated to be proportionnal to the
410 # size of revision we actually encounter.
411 # size of revision we actually encounter.
411 self._uncompressed_chunk_cache = None
412 self._uncompressed_chunk_cache = None
412 if self.data_config.uncompressed_cache_factor is not None:
413 if self.data_config.uncompressed_cache_factor is not None:
413 self._uncompressed_chunk_cache = util.lrucachedict(
414 self._uncompressed_chunk_cache = util.lrucachedict(
414 self.data_config.uncompressed_cache_count,
415 self.data_config.uncompressed_cache_count,
415 maxcost=65536, # some arbitrary initial value
416 maxcost=65536, # some arbitrary initial value
416 )
417 )
417
418
418 self._delay_buffer = None
419 self._delay_buffer = None
419
420
420 def __len__(self):
421 def __len__(self):
421 return len(self.index)
422 return len(self.index)
422
423
423 def clear_cache(self):
424 def clear_cache(self):
424 assert not self.is_delaying
425 assert not self.is_delaying
425 self._revisioncache = None
426 self._revisioncache = None
426 if self._uncompressed_chunk_cache is not None:
427 if self._uncompressed_chunk_cache is not None:
427 self._uncompressed_chunk_cache.clear()
428 self._uncompressed_chunk_cache.clear()
428 self._segmentfile.clear_cache()
429 self._segmentfile.clear_cache()
429 self._segmentfile_sidedata.clear_cache()
430 self._segmentfile_sidedata.clear_cache()
430
431
431 @property
432 @property
432 def canonical_index_file(self):
433 def canonical_index_file(self):
433 if self._orig_index_file is not None:
434 if self._orig_index_file is not None:
434 return self._orig_index_file
435 return self._orig_index_file
435 return self.index_file
436 return self.index_file
436
437
437 @property
438 @property
438 def is_delaying(self):
439 def is_delaying(self):
439 """is the revlog is currently delaying the visibility of written data?
440 """is the revlog is currently delaying the visibility of written data?
440
441
441 The delaying mechanism can be either in-memory or written on disk in a
442 The delaying mechanism can be either in-memory or written on disk in a
442 side-file."""
443 side-file."""
443 return (self._delay_buffer is not None) or (
444 return (self._delay_buffer is not None) or (
444 self._orig_index_file is not None
445 self._orig_index_file is not None
445 )
446 )
446
447
447 # Derived from index values.
448 # Derived from index values.
448
449
449 def start(self, rev):
450 def start(self, rev):
450 """the offset of the data chunk for this revision"""
451 """the offset of the data chunk for this revision"""
451 return int(self.index[rev][0] >> 16)
452 return int(self.index[rev][0] >> 16)
452
453
453 def length(self, rev):
454 def length(self, rev):
454 """the length of the data chunk for this revision"""
455 """the length of the data chunk for this revision"""
455 return self.index[rev][1]
456 return self.index[rev][1]
456
457
457 def end(self, rev):
458 def end(self, rev):
458 """the end of the data chunk for this revision"""
459 """the end of the data chunk for this revision"""
459 return self.start(rev) + self.length(rev)
460 return self.start(rev) + self.length(rev)
460
461
461 def deltaparent(self, rev):
462 def deltaparent(self, rev):
462 """return deltaparent of the given revision"""
463 """return deltaparent of the given revision"""
463 base = self.index[rev][3]
464 base = self.index[rev][3]
464 if base == rev:
465 if base == rev:
465 return nullrev
466 return nullrev
466 elif self.delta_config.general_delta:
467 elif self.delta_config.general_delta:
467 return base
468 return base
468 else:
469 else:
469 return rev - 1
470 return rev - 1
470
471
471 def issnapshot(self, rev):
472 def issnapshot(self, rev):
472 """tells whether rev is a snapshot"""
473 """tells whether rev is a snapshot"""
473 if not self.delta_config.sparse_revlog:
474 if not self.delta_config.sparse_revlog:
474 return self.deltaparent(rev) == nullrev
475 return self.deltaparent(rev) == nullrev
475 elif hasattr(self.index, 'issnapshot'):
476 elif hasattr(self.index, 'issnapshot'):
476 # directly assign the method to cache the testing and access
477 # directly assign the method to cache the testing and access
477 self.issnapshot = self.index.issnapshot
478 self.issnapshot = self.index.issnapshot
478 return self.issnapshot(rev)
479 return self.issnapshot(rev)
479 if rev == nullrev:
480 if rev == nullrev:
480 return True
481 return True
481 entry = self.index[rev]
482 entry = self.index[rev]
482 base = entry[3]
483 base = entry[3]
483 if base == rev:
484 if base == rev:
484 return True
485 return True
485 if base == nullrev:
486 if base == nullrev:
486 return True
487 return True
487 p1 = entry[5]
488 p1 = entry[5]
488 while self.length(p1) == 0:
489 while self.length(p1) == 0:
489 b = self.deltaparent(p1)
490 b = self.deltaparent(p1)
490 if b == p1:
491 if b == p1:
491 break
492 break
492 p1 = b
493 p1 = b
493 p2 = entry[6]
494 p2 = entry[6]
494 while self.length(p2) == 0:
495 while self.length(p2) == 0:
495 b = self.deltaparent(p2)
496 b = self.deltaparent(p2)
496 if b == p2:
497 if b == p2:
497 break
498 break
498 p2 = b
499 p2 = b
499 if base == p1 or base == p2:
500 if base == p1 or base == p2:
500 return False
501 return False
501 return self.issnapshot(base)
502 return self.issnapshot(base)
502
503
503 def _deltachain(self, rev, stoprev=None):
504 def _deltachain(self, rev, stoprev=None):
504 """Obtain the delta chain for a revision.
505 """Obtain the delta chain for a revision.
505
506
506 ``stoprev`` specifies a revision to stop at. If not specified, we
507 ``stoprev`` specifies a revision to stop at. If not specified, we
507 stop at the base of the chain.
508 stop at the base of the chain.
508
509
509 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
510 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
510 revs in ascending order and ``stopped`` is a bool indicating whether
511 revs in ascending order and ``stopped`` is a bool indicating whether
511 ``stoprev`` was hit.
512 ``stoprev`` was hit.
512 """
513 """
513 generaldelta = self.delta_config.general_delta
514 generaldelta = self.delta_config.general_delta
514 # Try C implementation.
515 # Try C implementation.
515 try:
516 try:
516 return self.index.deltachain(rev, stoprev, generaldelta)
517 return self.index.deltachain(rev, stoprev, generaldelta)
517 except AttributeError:
518 except AttributeError:
518 pass
519 pass
519
520
520 chain = []
521 chain = []
521
522
522 # Alias to prevent attribute lookup in tight loop.
523 # Alias to prevent attribute lookup in tight loop.
523 index = self.index
524 index = self.index
524
525
525 iterrev = rev
526 iterrev = rev
526 e = index[iterrev]
527 e = index[iterrev]
527 while iterrev != e[3] and iterrev != stoprev:
528 while iterrev != e[3] and iterrev != stoprev:
528 chain.append(iterrev)
529 chain.append(iterrev)
529 if generaldelta:
530 if generaldelta:
530 iterrev = e[3]
531 iterrev = e[3]
531 else:
532 else:
532 iterrev -= 1
533 iterrev -= 1
533 e = index[iterrev]
534 e = index[iterrev]
534
535
535 if iterrev == stoprev:
536 if iterrev == stoprev:
536 stopped = True
537 stopped = True
537 else:
538 else:
538 chain.append(iterrev)
539 chain.append(iterrev)
539 stopped = False
540 stopped = False
540
541
541 chain.reverse()
542 chain.reverse()
542 return chain, stopped
543 return chain, stopped
543
544
544 @util.propertycache
545 @util.propertycache
545 def _compressor(self):
546 def _compressor(self):
546 engine = util.compengines[self.feature_config.compression_engine]
547 engine = util.compengines[self.feature_config.compression_engine]
547 return engine.revlogcompressor(
548 return engine.revlogcompressor(
548 self.feature_config.compression_engine_options
549 self.feature_config.compression_engine_options
549 )
550 )
550
551
551 @util.propertycache
552 @util.propertycache
552 def _decompressor(self):
553 def _decompressor(self):
553 """the default decompressor"""
554 """the default decompressor"""
554 if self._default_compression_header is None:
555 if self._default_compression_header is None:
555 return None
556 return None
556 t = self._default_compression_header
557 t = self._default_compression_header
557 c = self._get_decompressor(t)
558 c = self._get_decompressor(t)
558 return c.decompress
559 return c.decompress
559
560
560 def _get_decompressor(self, t):
561 def _get_decompressor(self, t):
561 try:
562 try:
562 compressor = self._decompressors[t]
563 compressor = self._decompressors[t]
563 except KeyError:
564 except KeyError:
564 try:
565 try:
565 engine = util.compengines.forrevlogheader(t)
566 engine = util.compengines.forrevlogheader(t)
566 compressor = engine.revlogcompressor(
567 compressor = engine.revlogcompressor(
567 self.feature_config.compression_engine_options
568 self.feature_config.compression_engine_options
568 )
569 )
569 self._decompressors[t] = compressor
570 self._decompressors[t] = compressor
570 except KeyError:
571 except KeyError:
571 raise error.RevlogError(
572 raise error.RevlogError(
572 _(b'unknown compression type %s') % binascii.hexlify(t)
573 _(b'unknown compression type %s') % binascii.hexlify(t)
573 )
574 )
574 return compressor
575 return compressor
575
576
576 def compress(self, data):
577 def compress(self, data):
577 """Generate a possibly-compressed representation of data."""
578 """Generate a possibly-compressed representation of data."""
578 if not data:
579 if not data:
579 return b'', data
580 return b'', data
580
581
581 compressed = self._compressor.compress(data)
582 compressed = self._compressor.compress(data)
582
583
583 if compressed:
584 if compressed:
584 # The revlog compressor added the header in the returned data.
585 # The revlog compressor added the header in the returned data.
585 return b'', compressed
586 return b'', compressed
586
587
587 if data[0:1] == b'\0':
588 if data[0:1] == b'\0':
588 return b'', data
589 return b'', data
589 return b'u', data
590 return b'u', data
590
591
591 def decompress(self, data):
592 def decompress(self, data):
592 """Decompress a revlog chunk.
593 """Decompress a revlog chunk.
593
594
594 The chunk is expected to begin with a header identifying the
595 The chunk is expected to begin with a header identifying the
595 format type so it can be routed to an appropriate decompressor.
596 format type so it can be routed to an appropriate decompressor.
596 """
597 """
597 if not data:
598 if not data:
598 return data
599 return data
599
600
600 # Revlogs are read much more frequently than they are written and many
601 # Revlogs are read much more frequently than they are written and many
601 # chunks only take microseconds to decompress, so performance is
602 # chunks only take microseconds to decompress, so performance is
602 # important here.
603 # important here.
603 #
604 #
604 # We can make a few assumptions about revlogs:
605 # We can make a few assumptions about revlogs:
605 #
606 #
606 # 1) the majority of chunks will be compressed (as opposed to inline
607 # 1) the majority of chunks will be compressed (as opposed to inline
607 # raw data).
608 # raw data).
608 # 2) decompressing *any* data will likely by at least 10x slower than
609 # 2) decompressing *any* data will likely by at least 10x slower than
609 # returning raw inline data.
610 # returning raw inline data.
610 # 3) we want to prioritize common and officially supported compression
611 # 3) we want to prioritize common and officially supported compression
611 # engines
612 # engines
612 #
613 #
613 # It follows that we want to optimize for "decompress compressed data
614 # It follows that we want to optimize for "decompress compressed data
614 # when encoded with common and officially supported compression engines"
615 # when encoded with common and officially supported compression engines"
615 # case over "raw data" and "data encoded by less common or non-official
616 # case over "raw data" and "data encoded by less common or non-official
616 # compression engines." That is why we have the inline lookup first
617 # compression engines." That is why we have the inline lookup first
617 # followed by the compengines lookup.
618 # followed by the compengines lookup.
618 #
619 #
619 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
620 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
620 # compressed chunks. And this matters for changelog and manifest reads.
621 # compressed chunks. And this matters for changelog and manifest reads.
621 t = data[0:1]
622 t = data[0:1]
622
623
623 if t == b'x':
624 if t == b'x':
624 try:
625 try:
625 return _zlibdecompress(data)
626 return _zlibdecompress(data)
626 except zlib.error as e:
627 except zlib.error as e:
627 raise error.RevlogError(
628 raise error.RevlogError(
628 _(b'revlog decompress error: %s')
629 _(b'revlog decompress error: %s')
629 % stringutil.forcebytestr(e)
630 % stringutil.forcebytestr(e)
630 )
631 )
631 # '\0' is more common than 'u' so it goes first.
632 # '\0' is more common than 'u' so it goes first.
632 elif t == b'\0':
633 elif t == b'\0':
633 return data
634 return data
634 elif t == b'u':
635 elif t == b'u':
635 return util.buffer(data, 1)
636 return util.buffer(data, 1)
636
637
637 compressor = self._get_decompressor(t)
638 compressor = self._get_decompressor(t)
638
639
639 return compressor.decompress(data)
640 return compressor.decompress(data)
640
641
641 @contextlib.contextmanager
642 @contextlib.contextmanager
642 def reading(self):
643 def reading(self):
643 """Context manager that keeps data and sidedata files open for reading"""
644 """Context manager that keeps data and sidedata files open for reading"""
644 if len(self.index) == 0:
645 if len(self.index) == 0:
645 yield # nothing to be read
646 yield # nothing to be read
646 elif self._delay_buffer is not None and self.inline:
647 elif self._delay_buffer is not None and self.inline:
647 msg = "revlog with delayed write should not be inline"
648 msg = "revlog with delayed write should not be inline"
648 raise error.ProgrammingError(msg)
649 raise error.ProgrammingError(msg)
649 else:
650 else:
650 with self._segmentfile.reading():
651 with self._segmentfile.reading():
651 with self._segmentfile_sidedata.reading():
652 with self._segmentfile_sidedata.reading():
652 yield
653 yield
653
654
654 @property
655 @property
655 def is_writing(self):
656 def is_writing(self):
656 """True is a writing context is open"""
657 """True is a writing context is open"""
657 return self._writinghandles is not None
658 return self._writinghandles is not None
658
659
659 @property
660 @property
660 def is_open(self):
661 def is_open(self):
661 """True if any file handle is being held
662 """True if any file handle is being held
662
663
663 Used for assert and debug in the python code"""
664 Used for assert and debug in the python code"""
664 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
665 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
665
666
666 @contextlib.contextmanager
667 @contextlib.contextmanager
667 def writing(self, transaction, data_end=None, sidedata_end=None):
668 def writing(self, transaction, data_end=None, sidedata_end=None):
668 """Open the revlog files for writing
669 """Open the revlog files for writing
669
670
670 Add content to a revlog should be done within such context.
671 Add content to a revlog should be done within such context.
671 """
672 """
672 if self.is_writing:
673 if self.is_writing:
673 yield
674 yield
674 else:
675 else:
675 ifh = dfh = sdfh = None
676 ifh = dfh = sdfh = None
676 try:
677 try:
677 r = len(self.index)
678 r = len(self.index)
678 # opening the data file.
679 # opening the data file.
679 dsize = 0
680 dsize = 0
680 if r:
681 if r:
681 dsize = self.end(r - 1)
682 dsize = self.end(r - 1)
682 dfh = None
683 dfh = None
683 if not self.inline:
684 if not self.inline:
684 try:
685 try:
685 dfh = self.opener(self.data_file, mode=b"r+")
686 dfh = self.opener(self.data_file, mode=b"r+")
686 if data_end is None:
687 if data_end is None:
687 dfh.seek(0, os.SEEK_END)
688 dfh.seek(0, os.SEEK_END)
688 else:
689 else:
689 dfh.seek(data_end, os.SEEK_SET)
690 dfh.seek(data_end, os.SEEK_SET)
690 except FileNotFoundError:
691 except FileNotFoundError:
691 dfh = self.opener(self.data_file, mode=b"w+")
692 dfh = self.opener(self.data_file, mode=b"w+")
692 transaction.add(self.data_file, dsize)
693 transaction.add(self.data_file, dsize)
693 if self.sidedata_file is not None:
694 if self.sidedata_file is not None:
694 assert sidedata_end is not None
695 assert sidedata_end is not None
695 # revlog-v2 does not inline, help Pytype
696 # revlog-v2 does not inline, help Pytype
696 assert dfh is not None
697 assert dfh is not None
697 try:
698 try:
698 sdfh = self.opener(self.sidedata_file, mode=b"r+")
699 sdfh = self.opener(self.sidedata_file, mode=b"r+")
699 dfh.seek(sidedata_end, os.SEEK_SET)
700 dfh.seek(sidedata_end, os.SEEK_SET)
700 except FileNotFoundError:
701 except FileNotFoundError:
701 sdfh = self.opener(self.sidedata_file, mode=b"w+")
702 sdfh = self.opener(self.sidedata_file, mode=b"w+")
702 transaction.add(self.sidedata_file, sidedata_end)
703 transaction.add(self.sidedata_file, sidedata_end)
703
704
704 # opening the index file.
705 # opening the index file.
705 isize = r * self.index.entry_size
706 isize = r * self.index.entry_size
706 ifh = self.__index_write_fp()
707 ifh = self.__index_write_fp()
707 if self.inline:
708 if self.inline:
708 transaction.add(self.index_file, dsize + isize)
709 transaction.add(self.index_file, dsize + isize)
709 else:
710 else:
710 transaction.add(self.index_file, isize)
711 transaction.add(self.index_file, isize)
711 # exposing all file handle for writing.
712 # exposing all file handle for writing.
712 self._writinghandles = (ifh, dfh, sdfh)
713 self._writinghandles = (ifh, dfh, sdfh)
713 self._segmentfile.writing_handle = ifh if self.inline else dfh
714 self._segmentfile.writing_handle = ifh if self.inline else dfh
714 self._segmentfile_sidedata.writing_handle = sdfh
715 self._segmentfile_sidedata.writing_handle = sdfh
715 yield
716 yield
716 finally:
717 finally:
717 self._writinghandles = None
718 self._writinghandles = None
718 self._segmentfile.writing_handle = None
719 self._segmentfile.writing_handle = None
719 self._segmentfile_sidedata.writing_handle = None
720 self._segmentfile_sidedata.writing_handle = None
720 if dfh is not None:
721 if dfh is not None:
721 dfh.close()
722 dfh.close()
722 if sdfh is not None:
723 if sdfh is not None:
723 sdfh.close()
724 sdfh.close()
724 # closing the index file last to avoid exposing referent to
725 # closing the index file last to avoid exposing referent to
725 # potential unflushed data content.
726 # potential unflushed data content.
726 if ifh is not None:
727 if ifh is not None:
727 ifh.close()
728 ifh.close()
728
729
729 def __index_write_fp(self, index_end=None):
730 def __index_write_fp(self, index_end=None):
730 """internal method to open the index file for writing
731 """internal method to open the index file for writing
731
732
732 You should not use this directly and use `_writing` instead
733 You should not use this directly and use `_writing` instead
733 """
734 """
734 try:
735 try:
735 if self._delay_buffer is None:
736 if self._delay_buffer is None:
736 f = self.opener(
737 f = self.opener(
737 self.index_file,
738 self.index_file,
738 mode=b"r+",
739 mode=b"r+",
739 checkambig=self.data_config.check_ambig,
740 checkambig=self.data_config.check_ambig,
740 )
741 )
741 else:
742 else:
742 # check_ambig affect we way we open file for writing, however
743 # check_ambig affect we way we open file for writing, however
743 # here, we do not actually open a file for writting as write
744 # here, we do not actually open a file for writting as write
744 # will appened to a delay_buffer. So check_ambig is not
745 # will appened to a delay_buffer. So check_ambig is not
745 # meaningful and unneeded here.
746 # meaningful and unneeded here.
746 f = randomaccessfile.appender(
747 f = randomaccessfile.appender(
747 self.opener, self.index_file, b"r+", self._delay_buffer
748 self.opener, self.index_file, b"r+", self._delay_buffer
748 )
749 )
749 if index_end is None:
750 if index_end is None:
750 f.seek(0, os.SEEK_END)
751 f.seek(0, os.SEEK_END)
751 else:
752 else:
752 f.seek(index_end, os.SEEK_SET)
753 f.seek(index_end, os.SEEK_SET)
753 return f
754 return f
754 except FileNotFoundError:
755 except FileNotFoundError:
755 if self._delay_buffer is None:
756 if self._delay_buffer is None:
756 return self.opener(
757 return self.opener(
757 self.index_file,
758 self.index_file,
758 mode=b"w+",
759 mode=b"w+",
759 checkambig=self.data_config.check_ambig,
760 checkambig=self.data_config.check_ambig,
760 )
761 )
761 else:
762 else:
762 return randomaccessfile.appender(
763 return randomaccessfile.appender(
763 self.opener, self.index_file, b"w+", self._delay_buffer
764 self.opener, self.index_file, b"w+", self._delay_buffer
764 )
765 )
765
766
766 def __index_new_fp(self):
767 def __index_new_fp(self):
767 """internal method to create a new index file for writing
768 """internal method to create a new index file for writing
768
769
769 You should not use this unless you are upgrading from inline revlog
770 You should not use this unless you are upgrading from inline revlog
770 """
771 """
771 return self.opener(
772 return self.opener(
772 self.index_file,
773 self.index_file,
773 mode=b"w",
774 mode=b"w",
774 checkambig=self.data_config.check_ambig,
775 checkambig=self.data_config.check_ambig,
775 )
776 )
776
777
777 def split_inline(self, tr, header, new_index_file_path=None):
778 def split_inline(self, tr, header, new_index_file_path=None):
778 """split the data of an inline revlog into an index and a data file"""
779 """split the data of an inline revlog into an index and a data file"""
779 assert self._delay_buffer is None
780 assert self._delay_buffer is None
780 existing_handles = False
781 existing_handles = False
781 if self._writinghandles is not None:
782 if self._writinghandles is not None:
782 existing_handles = True
783 existing_handles = True
783 fp = self._writinghandles[0]
784 fp = self._writinghandles[0]
784 fp.flush()
785 fp.flush()
785 fp.close()
786 fp.close()
786 # We can't use the cached file handle after close(). So prevent
787 # We can't use the cached file handle after close(). So prevent
787 # its usage.
788 # its usage.
788 self._writinghandles = None
789 self._writinghandles = None
789 self._segmentfile.writing_handle = None
790 self._segmentfile.writing_handle = None
790 # No need to deal with sidedata writing handle as it is only
791 # No need to deal with sidedata writing handle as it is only
791 # relevant with revlog-v2 which is never inline, not reaching
792 # relevant with revlog-v2 which is never inline, not reaching
792 # this code
793 # this code
793
794
794 new_dfh = self.opener(self.data_file, mode=b"w+")
795 new_dfh = self.opener(self.data_file, mode=b"w+")
795 new_dfh.truncate(0) # drop any potentially existing data
796 new_dfh.truncate(0) # drop any potentially existing data
796 try:
797 try:
797 with self.reading():
798 with self.reading():
798 for r in range(len(self.index)):
799 for r in range(len(self.index)):
799 new_dfh.write(self.get_segment_for_revs(r, r)[1])
800 new_dfh.write(self.get_segment_for_revs(r, r)[1])
800 new_dfh.flush()
801 new_dfh.flush()
801
802
802 if new_index_file_path is not None:
803 if new_index_file_path is not None:
803 self.index_file = new_index_file_path
804 self.index_file = new_index_file_path
804 with self.__index_new_fp() as fp:
805 with self.__index_new_fp() as fp:
805 self.inline = False
806 self.inline = False
806 for i in range(len(self.index)):
807 for i in range(len(self.index)):
807 e = self.index.entry_binary(i)
808 e = self.index.entry_binary(i)
808 if i == 0:
809 if i == 0:
809 packed_header = self.index.pack_header(header)
810 packed_header = self.index.pack_header(header)
810 e = packed_header + e
811 e = packed_header + e
811 fp.write(e)
812 fp.write(e)
812
813
813 # If we don't use side-write, the temp file replace the real
814 # If we don't use side-write, the temp file replace the real
814 # index when we exit the context manager
815 # index when we exit the context manager
815
816
816 self._segmentfile = randomaccessfile.randomaccessfile(
817 self._segmentfile = randomaccessfile.randomaccessfile(
817 self.opener,
818 self.opener,
818 self.data_file,
819 self.data_file,
819 self.data_config.chunk_cache_size,
820 self.data_config.chunk_cache_size,
820 )
821 )
821
822
822 if existing_handles:
823 if existing_handles:
823 # switched from inline to conventional reopen the index
824 # switched from inline to conventional reopen the index
824 ifh = self.__index_write_fp()
825 ifh = self.__index_write_fp()
825 self._writinghandles = (ifh, new_dfh, None)
826 self._writinghandles = (ifh, new_dfh, None)
826 self._segmentfile.writing_handle = new_dfh
827 self._segmentfile.writing_handle = new_dfh
827 new_dfh = None
828 new_dfh = None
828 # No need to deal with sidedata writing handle as it is only
829 # No need to deal with sidedata writing handle as it is only
829 # relevant with revlog-v2 which is never inline, not reaching
830 # relevant with revlog-v2 which is never inline, not reaching
830 # this code
831 # this code
831 finally:
832 finally:
832 if new_dfh is not None:
833 if new_dfh is not None:
833 new_dfh.close()
834 new_dfh.close()
834 return self.index_file
835 return self.index_file
835
836
836 def get_segment_for_revs(self, startrev, endrev):
837 def get_segment_for_revs(self, startrev, endrev):
837 """Obtain a segment of raw data corresponding to a range of revisions.
838 """Obtain a segment of raw data corresponding to a range of revisions.
838
839
839 Accepts the start and end revisions and an optional already-open
840 Accepts the start and end revisions and an optional already-open
840 file handle to be used for reading. If the file handle is read, its
841 file handle to be used for reading. If the file handle is read, its
841 seek position will not be preserved.
842 seek position will not be preserved.
842
843
843 Requests for data may be satisfied by a cache.
844 Requests for data may be satisfied by a cache.
844
845
845 Returns a 2-tuple of (offset, data) for the requested range of
846 Returns a 2-tuple of (offset, data) for the requested range of
846 revisions. Offset is the integer offset from the beginning of the
847 revisions. Offset is the integer offset from the beginning of the
847 revlog and data is a str or buffer of the raw byte data.
848 revlog and data is a str or buffer of the raw byte data.
848
849
849 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
850 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
850 to determine where each revision's data begins and ends.
851 to determine where each revision's data begins and ends.
851
852
852 API: we should consider making this a private part of the InnerRevlog
853 API: we should consider making this a private part of the InnerRevlog
853 at some point.
854 at some point.
854 """
855 """
855 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
856 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
856 # (functions are expensive).
857 # (functions are expensive).
857 index = self.index
858 index = self.index
858 istart = index[startrev]
859 istart = index[startrev]
859 start = int(istart[0] >> 16)
860 start = int(istart[0] >> 16)
860 if startrev == endrev:
861 if startrev == endrev:
861 end = start + istart[1]
862 end = start + istart[1]
862 else:
863 else:
863 iend = index[endrev]
864 iend = index[endrev]
864 end = int(iend[0] >> 16) + iend[1]
865 end = int(iend[0] >> 16) + iend[1]
865
866
866 if self.inline:
867 if self.inline:
867 start += (startrev + 1) * self.index.entry_size
868 start += (startrev + 1) * self.index.entry_size
868 end += (endrev + 1) * self.index.entry_size
869 end += (endrev + 1) * self.index.entry_size
869 length = end - start
870 length = end - start
870
871
871 return start, self._segmentfile.read_chunk(start, length)
872 return start, self._segmentfile.read_chunk(start, length)
872
873
873 def _chunk(self, rev):
874 def _chunk(self, rev):
874 """Obtain a single decompressed chunk for a revision.
875 """Obtain a single decompressed chunk for a revision.
875
876
876 Accepts an integer revision and an optional already-open file handle
877 Accepts an integer revision and an optional already-open file handle
877 to be used for reading. If used, the seek position of the file will not
878 to be used for reading. If used, the seek position of the file will not
878 be preserved.
879 be preserved.
879
880
880 Returns a str holding uncompressed data for the requested revision.
881 Returns a str holding uncompressed data for the requested revision.
881 """
882 """
882 if self._uncompressed_chunk_cache is not None:
883 if self._uncompressed_chunk_cache is not None:
883 uncomp = self._uncompressed_chunk_cache.get(rev)
884 uncomp = self._uncompressed_chunk_cache.get(rev)
884 if uncomp is not None:
885 if uncomp is not None:
885 return uncomp
886 return uncomp
886
887
887 compression_mode = self.index[rev][10]
888 compression_mode = self.index[rev][10]
888 data = self.get_segment_for_revs(rev, rev)[1]
889 data = self.get_segment_for_revs(rev, rev)[1]
889 if compression_mode == COMP_MODE_PLAIN:
890 if compression_mode == COMP_MODE_PLAIN:
890 uncomp = data
891 uncomp = data
891 elif compression_mode == COMP_MODE_DEFAULT:
892 elif compression_mode == COMP_MODE_DEFAULT:
892 uncomp = self._decompressor(data)
893 uncomp = self._decompressor(data)
893 elif compression_mode == COMP_MODE_INLINE:
894 elif compression_mode == COMP_MODE_INLINE:
894 uncomp = self.decompress(data)
895 uncomp = self.decompress(data)
895 else:
896 else:
896 msg = b'unknown compression mode %d'
897 msg = b'unknown compression mode %d'
897 msg %= compression_mode
898 msg %= compression_mode
898 raise error.RevlogError(msg)
899 raise error.RevlogError(msg)
899 if self._uncompressed_chunk_cache is not None:
900 if self._uncompressed_chunk_cache is not None:
900 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
901 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
901 return uncomp
902 return uncomp
902
903
903 def _chunks(self, revs, targetsize=None):
904 def _chunks(self, revs, targetsize=None):
904 """Obtain decompressed chunks for the specified revisions.
905 """Obtain decompressed chunks for the specified revisions.
905
906
906 Accepts an iterable of numeric revisions that are assumed to be in
907 Accepts an iterable of numeric revisions that are assumed to be in
907 ascending order. Also accepts an optional already-open file handle
908 ascending order. Also accepts an optional already-open file handle
908 to be used for reading. If used, the seek position of the file will
909 to be used for reading. If used, the seek position of the file will
909 not be preserved.
910 not be preserved.
910
911
911 This function is similar to calling ``self._chunk()`` multiple times,
912 This function is similar to calling ``self._chunk()`` multiple times,
912 but is faster.
913 but is faster.
913
914
914 Returns a list with decompressed data for each requested revision.
915 Returns a list with decompressed data for each requested revision.
915 """
916 """
916 if not revs:
917 if not revs:
917 return []
918 return []
918 start = self.start
919 start = self.start
919 length = self.length
920 length = self.length
920 inline = self.inline
921 inline = self.inline
921 iosize = self.index.entry_size
922 iosize = self.index.entry_size
922 buffer = util.buffer
923 buffer = util.buffer
923
924
924 fetched_revs = []
925 fetched_revs = []
925 fadd = fetched_revs.append
926 fadd = fetched_revs.append
926
927
927 chunks = []
928 chunks = []
928 ladd = chunks.append
929 ladd = chunks.append
929
930
930 if self._uncompressed_chunk_cache is None:
931 if self._uncompressed_chunk_cache is None:
931 fetched_revs = revs
932 fetched_revs = revs
932 else:
933 else:
933 for rev in revs:
934 for rev in revs:
934 cached_value = self._uncompressed_chunk_cache.get(rev)
935 cached_value = self._uncompressed_chunk_cache.get(rev)
935 if cached_value is None:
936 if cached_value is None:
936 fadd(rev)
937 fadd(rev)
937 else:
938 else:
938 ladd((rev, cached_value))
939 ladd((rev, cached_value))
939
940
940 if not fetched_revs:
941 if not fetched_revs:
941 slicedchunks = ()
942 slicedchunks = ()
942 elif not self.data_config.with_sparse_read:
943 elif not self.data_config.with_sparse_read:
943 slicedchunks = (fetched_revs,)
944 slicedchunks = (fetched_revs,)
944 else:
945 else:
945 slicedchunks = deltautil.slicechunk(
946 slicedchunks = deltautil.slicechunk(
946 self,
947 self,
947 fetched_revs,
948 fetched_revs,
948 targetsize=targetsize,
949 targetsize=targetsize,
949 )
950 )
950
951
951 for revschunk in slicedchunks:
952 for revschunk in slicedchunks:
952 firstrev = revschunk[0]
953 firstrev = revschunk[0]
953 # Skip trailing revisions with empty diff
954 # Skip trailing revisions with empty diff
954 for lastrev in revschunk[::-1]:
955 for lastrev in revschunk[::-1]:
955 if length(lastrev) != 0:
956 if length(lastrev) != 0:
956 break
957 break
957
958
958 try:
959 try:
959 offset, data = self.get_segment_for_revs(firstrev, lastrev)
960 offset, data = self.get_segment_for_revs(firstrev, lastrev)
960 except OverflowError:
961 except OverflowError:
961 # issue4215 - we can't cache a run of chunks greater than
962 # issue4215 - we can't cache a run of chunks greater than
962 # 2G on Windows
963 # 2G on Windows
963 for rev in revschunk:
964 for rev in revschunk:
964 ladd((rev, self._chunk(rev)))
965 ladd((rev, self._chunk(rev)))
965
966
966 decomp = self.decompress
967 decomp = self.decompress
967 # self._decompressor might be None, but will not be used in that case
968 # self._decompressor might be None, but will not be used in that case
968 def_decomp = self._decompressor
969 def_decomp = self._decompressor
969 for rev in revschunk:
970 for rev in revschunk:
970 chunkstart = start(rev)
971 chunkstart = start(rev)
971 if inline:
972 if inline:
972 chunkstart += (rev + 1) * iosize
973 chunkstart += (rev + 1) * iosize
973 chunklength = length(rev)
974 chunklength = length(rev)
974 comp_mode = self.index[rev][10]
975 comp_mode = self.index[rev][10]
975 c = buffer(data, chunkstart - offset, chunklength)
976 c = buffer(data, chunkstart - offset, chunklength)
976 if comp_mode == COMP_MODE_PLAIN:
977 if comp_mode == COMP_MODE_PLAIN:
977 c = c
978 c = c
978 elif comp_mode == COMP_MODE_INLINE:
979 elif comp_mode == COMP_MODE_INLINE:
979 c = decomp(c)
980 c = decomp(c)
980 elif comp_mode == COMP_MODE_DEFAULT:
981 elif comp_mode == COMP_MODE_DEFAULT:
981 c = def_decomp(c)
982 c = def_decomp(c)
982 else:
983 else:
983 msg = b'unknown compression mode %d'
984 msg = b'unknown compression mode %d'
984 msg %= comp_mode
985 msg %= comp_mode
985 raise error.RevlogError(msg)
986 raise error.RevlogError(msg)
986 ladd((rev, c))
987 ladd((rev, c))
987 if self._uncompressed_chunk_cache is not None:
988 if self._uncompressed_chunk_cache is not None:
988 self._uncompressed_chunk_cache.insert(rev, c, len(c))
989 self._uncompressed_chunk_cache.insert(rev, c, len(c))
989
990
990 chunks.sort()
991 chunks.sort()
991 return [x[1] for x in chunks]
992 return [x[1] for x in chunks]
992
993
993 def raw_text(self, node, rev):
994 def raw_text(self, node, rev):
994 """return the possibly unvalidated rawtext for a revision
995 """return the possibly unvalidated rawtext for a revision
995
996
996 returns (rev, rawtext, validated)
997 returns (rev, rawtext, validated)
997 """
998 """
998
999
999 # revision in the cache (could be useful to apply delta)
1000 # revision in the cache (could be useful to apply delta)
1000 cachedrev = None
1001 cachedrev = None
1001 # An intermediate text to apply deltas to
1002 # An intermediate text to apply deltas to
1002 basetext = None
1003 basetext = None
1003
1004
1004 # Check if we have the entry in cache
1005 # Check if we have the entry in cache
1005 # The cache entry looks like (node, rev, rawtext)
1006 # The cache entry looks like (node, rev, rawtext)
1006 if self._revisioncache:
1007 if self._revisioncache:
1007 cachedrev = self._revisioncache[1]
1008 cachedrev = self._revisioncache[1]
1008
1009
1009 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1010 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1010 if stopped:
1011 if stopped:
1011 basetext = self._revisioncache[2]
1012 basetext = self._revisioncache[2]
1012
1013
1013 # drop cache to save memory, the caller is expected to
1014 # drop cache to save memory, the caller is expected to
1014 # update self._inner._revisioncache after validating the text
1015 # update self._inner._revisioncache after validating the text
1015 self._revisioncache = None
1016 self._revisioncache = None
1016
1017
1017 targetsize = None
1018 targetsize = None
1018 rawsize = self.index[rev][2]
1019 rawsize = self.index[rev][2]
1019 if 0 <= rawsize:
1020 if 0 <= rawsize:
1020 targetsize = 4 * rawsize
1021 targetsize = 4 * rawsize
1021
1022
1022 if self._uncompressed_chunk_cache is not None:
1023 if self._uncompressed_chunk_cache is not None:
1023 # dynamically update the uncompressed_chunk_cache size to the
1024 # dynamically update the uncompressed_chunk_cache size to the
1024 # largest revision we saw in this revlog.
1025 # largest revision we saw in this revlog.
1025 factor = self.data_config.uncompressed_cache_factor
1026 factor = self.data_config.uncompressed_cache_factor
1026 candidate_size = rawsize * factor
1027 candidate_size = rawsize * factor
1027 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1028 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1028 self._uncompressed_chunk_cache.maxcost = candidate_size
1029 self._uncompressed_chunk_cache.maxcost = candidate_size
1029
1030
1030 bins = self._chunks(chain, targetsize=targetsize)
1031 bins = self._chunks(chain, targetsize=targetsize)
1031 if basetext is None:
1032 if basetext is None:
1032 basetext = bytes(bins[0])
1033 basetext = bytes(bins[0])
1033 bins = bins[1:]
1034 bins = bins[1:]
1034
1035
1035 rawtext = mdiff.patches(basetext, bins)
1036 rawtext = mdiff.patches(basetext, bins)
1036 del basetext # let us have a chance to free memory early
1037 del basetext # let us have a chance to free memory early
1037 return (rev, rawtext, False)
1038 return (rev, rawtext, False)
1038
1039
1039 def sidedata(self, rev, sidedata_end):
1040 def sidedata(self, rev, sidedata_end):
1040 """Return the sidedata for a given revision number."""
1041 """Return the sidedata for a given revision number."""
1041 index_entry = self.index[rev]
1042 index_entry = self.index[rev]
1042 sidedata_offset = index_entry[8]
1043 sidedata_offset = index_entry[8]
1043 sidedata_size = index_entry[9]
1044 sidedata_size = index_entry[9]
1044
1045
1045 if self.inline:
1046 if self.inline:
1046 sidedata_offset += self.index.entry_size * (1 + rev)
1047 sidedata_offset += self.index.entry_size * (1 + rev)
1047 if sidedata_size == 0:
1048 if sidedata_size == 0:
1048 return {}
1049 return {}
1049
1050
1050 if sidedata_end < sidedata_offset + sidedata_size:
1051 if sidedata_end < sidedata_offset + sidedata_size:
1051 filename = self.sidedata_file
1052 filename = self.sidedata_file
1052 end = sidedata_end
1053 end = sidedata_end
1053 offset = sidedata_offset
1054 offset = sidedata_offset
1054 length = sidedata_size
1055 length = sidedata_size
1055 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1056 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1056 raise error.RevlogError(m)
1057 raise error.RevlogError(m)
1057
1058
1058 comp_segment = self._segmentfile_sidedata.read_chunk(
1059 comp_segment = self._segmentfile_sidedata.read_chunk(
1059 sidedata_offset, sidedata_size
1060 sidedata_offset, sidedata_size
1060 )
1061 )
1061
1062
1062 comp = self.index[rev][11]
1063 comp = self.index[rev][11]
1063 if comp == COMP_MODE_PLAIN:
1064 if comp == COMP_MODE_PLAIN:
1064 segment = comp_segment
1065 segment = comp_segment
1065 elif comp == COMP_MODE_DEFAULT:
1066 elif comp == COMP_MODE_DEFAULT:
1066 segment = self._decompressor(comp_segment)
1067 segment = self._decompressor(comp_segment)
1067 elif comp == COMP_MODE_INLINE:
1068 elif comp == COMP_MODE_INLINE:
1068 segment = self.decompress(comp_segment)
1069 segment = self.decompress(comp_segment)
1069 else:
1070 else:
1070 msg = b'unknown compression mode %d'
1071 msg = b'unknown compression mode %d'
1071 msg %= comp
1072 msg %= comp
1072 raise error.RevlogError(msg)
1073 raise error.RevlogError(msg)
1073
1074
1074 sidedata = sidedatautil.deserialize_sidedata(segment)
1075 sidedata = sidedatautil.deserialize_sidedata(segment)
1075 return sidedata
1076 return sidedata
1076
1077
1077 def write_entry(
1078 def write_entry(
1078 self,
1079 self,
1079 transaction,
1080 transaction,
1080 entry,
1081 entry,
1081 data,
1082 data,
1082 link,
1083 link,
1083 offset,
1084 offset,
1084 sidedata,
1085 sidedata,
1085 sidedata_offset,
1086 sidedata_offset,
1086 index_end,
1087 index_end,
1087 data_end,
1088 data_end,
1088 sidedata_end,
1089 sidedata_end,
1089 ):
1090 ):
1090 # Files opened in a+ mode have inconsistent behavior on various
1091 # Files opened in a+ mode have inconsistent behavior on various
1091 # platforms. Windows requires that a file positioning call be made
1092 # platforms. Windows requires that a file positioning call be made
1092 # when the file handle transitions between reads and writes. See
1093 # when the file handle transitions between reads and writes. See
1093 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1094 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1094 # platforms, Python or the platform itself can be buggy. Some versions
1095 # platforms, Python or the platform itself can be buggy. Some versions
1095 # of Solaris have been observed to not append at the end of the file
1096 # of Solaris have been observed to not append at the end of the file
1096 # if the file was seeked to before the end. See issue4943 for more.
1097 # if the file was seeked to before the end. See issue4943 for more.
1097 #
1098 #
1098 # We work around this issue by inserting a seek() before writing.
1099 # We work around this issue by inserting a seek() before writing.
1099 # Note: This is likely not necessary on Python 3. However, because
1100 # Note: This is likely not necessary on Python 3. However, because
1100 # the file handle is reused for reads and may be seeked there, we need
1101 # the file handle is reused for reads and may be seeked there, we need
1101 # to be careful before changing this.
1102 # to be careful before changing this.
1102 if self._writinghandles is None:
1103 if self._writinghandles is None:
1103 msg = b'adding revision outside `revlog._writing` context'
1104 msg = b'adding revision outside `revlog._writing` context'
1104 raise error.ProgrammingError(msg)
1105 raise error.ProgrammingError(msg)
1105 ifh, dfh, sdfh = self._writinghandles
1106 ifh, dfh, sdfh = self._writinghandles
1106 if index_end is None:
1107 if index_end is None:
1107 ifh.seek(0, os.SEEK_END)
1108 ifh.seek(0, os.SEEK_END)
1108 else:
1109 else:
1109 ifh.seek(index_end, os.SEEK_SET)
1110 ifh.seek(index_end, os.SEEK_SET)
1110 if dfh:
1111 if dfh:
1111 if data_end is None:
1112 if data_end is None:
1112 dfh.seek(0, os.SEEK_END)
1113 dfh.seek(0, os.SEEK_END)
1113 else:
1114 else:
1114 dfh.seek(data_end, os.SEEK_SET)
1115 dfh.seek(data_end, os.SEEK_SET)
1115 if sdfh:
1116 if sdfh:
1116 sdfh.seek(sidedata_end, os.SEEK_SET)
1117 sdfh.seek(sidedata_end, os.SEEK_SET)
1117
1118
1118 curr = len(self.index) - 1
1119 curr = len(self.index) - 1
1119 if not self.inline:
1120 if not self.inline:
1120 transaction.add(self.data_file, offset)
1121 transaction.add(self.data_file, offset)
1121 if self.sidedata_file:
1122 if self.sidedata_file:
1122 transaction.add(self.sidedata_file, sidedata_offset)
1123 transaction.add(self.sidedata_file, sidedata_offset)
1123 transaction.add(self.canonical_index_file, curr * len(entry))
1124 transaction.add(self.canonical_index_file, curr * len(entry))
1124 if data[0]:
1125 if data[0]:
1125 dfh.write(data[0])
1126 dfh.write(data[0])
1126 dfh.write(data[1])
1127 dfh.write(data[1])
1127 if sidedata:
1128 if sidedata:
1128 sdfh.write(sidedata)
1129 sdfh.write(sidedata)
1129 if self._delay_buffer is None:
1130 if self._delay_buffer is None:
1130 ifh.write(entry)
1131 ifh.write(entry)
1131 else:
1132 else:
1132 self._delay_buffer.append(entry)
1133 self._delay_buffer.append(entry)
1133 elif self._delay_buffer is not None:
1134 elif self._delay_buffer is not None:
1134 msg = b'invalid delayed write on inline revlog'
1135 msg = b'invalid delayed write on inline revlog'
1135 raise error.ProgrammingError(msg)
1136 raise error.ProgrammingError(msg)
1136 else:
1137 else:
1137 offset += curr * self.index.entry_size
1138 offset += curr * self.index.entry_size
1138 transaction.add(self.canonical_index_file, offset)
1139 transaction.add(self.canonical_index_file, offset)
1139 assert not sidedata
1140 assert not sidedata
1140 ifh.write(entry)
1141 ifh.write(entry)
1141 ifh.write(data[0])
1142 ifh.write(data[0])
1142 ifh.write(data[1])
1143 ifh.write(data[1])
1143 return (
1144 return (
1144 ifh.tell(),
1145 ifh.tell(),
1145 dfh.tell() if dfh else None,
1146 dfh.tell() if dfh else None,
1146 sdfh.tell() if sdfh else None,
1147 sdfh.tell() if sdfh else None,
1147 )
1148 )
1148
1149
1149 def _divert_index(self):
1150 def _divert_index(self):
1150 return self.index_file + b'.a'
1151 return self.index_file + b'.a'
1151
1152
1152 def delay(self):
1153 def delay(self):
1153 assert not self.is_open
1154 assert not self.is_open
1154 if self.inline:
1155 if self.inline:
1155 msg = "revlog with delayed write should not be inline"
1156 msg = "revlog with delayed write should not be inline"
1156 raise error.ProgrammingError(msg)
1157 raise error.ProgrammingError(msg)
1157 if self._delay_buffer is not None or self._orig_index_file is not None:
1158 if self._delay_buffer is not None or self._orig_index_file is not None:
1158 # delay or divert already in place
1159 # delay or divert already in place
1159 return None
1160 return None
1160 elif len(self.index) == 0:
1161 elif len(self.index) == 0:
1161 self._orig_index_file = self.index_file
1162 self._orig_index_file = self.index_file
1162 self.index_file = self._divert_index()
1163 self.index_file = self._divert_index()
1163 assert self._orig_index_file is not None
1164 assert self._orig_index_file is not None
1164 assert self.index_file is not None
1165 assert self.index_file is not None
1165 if self.opener.exists(self.index_file):
1166 if self.opener.exists(self.index_file):
1166 self.opener.unlink(self.index_file)
1167 self.opener.unlink(self.index_file)
1167 return self.index_file
1168 return self.index_file
1168 else:
1169 else:
1169 self._delay_buffer = []
1170 self._delay_buffer = []
1170 return None
1171 return None
1171
1172
1172 def write_pending(self):
1173 def write_pending(self):
1173 assert not self.is_open
1174 assert not self.is_open
1174 if self.inline:
1175 if self.inline:
1175 msg = "revlog with delayed write should not be inline"
1176 msg = "revlog with delayed write should not be inline"
1176 raise error.ProgrammingError(msg)
1177 raise error.ProgrammingError(msg)
1177 if self._orig_index_file is not None:
1178 if self._orig_index_file is not None:
1178 return None, True
1179 return None, True
1179 any_pending = False
1180 any_pending = False
1180 pending_index_file = self._divert_index()
1181 pending_index_file = self._divert_index()
1181 if self.opener.exists(pending_index_file):
1182 if self.opener.exists(pending_index_file):
1182 self.opener.unlink(pending_index_file)
1183 self.opener.unlink(pending_index_file)
1183 util.copyfile(
1184 util.copyfile(
1184 self.opener.join(self.index_file),
1185 self.opener.join(self.index_file),
1185 self.opener.join(pending_index_file),
1186 self.opener.join(pending_index_file),
1186 )
1187 )
1187 if self._delay_buffer:
1188 if self._delay_buffer:
1188 with self.opener(pending_index_file, b'r+') as ifh:
1189 with self.opener(pending_index_file, b'r+') as ifh:
1189 ifh.seek(0, os.SEEK_END)
1190 ifh.seek(0, os.SEEK_END)
1190 ifh.write(b"".join(self._delay_buffer))
1191 ifh.write(b"".join(self._delay_buffer))
1191 any_pending = True
1192 any_pending = True
1192 self._delay_buffer = None
1193 self._delay_buffer = None
1193 self._orig_index_file = self.index_file
1194 self._orig_index_file = self.index_file
1194 self.index_file = pending_index_file
1195 self.index_file = pending_index_file
1195 return self.index_file, any_pending
1196 return self.index_file, any_pending
1196
1197
1197 def finalize_pending(self):
1198 def finalize_pending(self):
1198 assert not self.is_open
1199 assert not self.is_open
1199 if self.inline:
1200 if self.inline:
1200 msg = "revlog with delayed write should not be inline"
1201 msg = "revlog with delayed write should not be inline"
1201 raise error.ProgrammingError(msg)
1202 raise error.ProgrammingError(msg)
1202
1203
1203 delay = self._delay_buffer is not None
1204 delay = self._delay_buffer is not None
1204 divert = self._orig_index_file is not None
1205 divert = self._orig_index_file is not None
1205
1206
1206 if delay and divert:
1207 if delay and divert:
1207 assert False, "unreachable"
1208 assert False, "unreachable"
1208 elif delay:
1209 elif delay:
1209 if self._delay_buffer:
1210 if self._delay_buffer:
1210 with self.opener(self.index_file, b'r+') as ifh:
1211 with self.opener(self.index_file, b'r+') as ifh:
1211 ifh.seek(0, os.SEEK_END)
1212 ifh.seek(0, os.SEEK_END)
1212 ifh.write(b"".join(self._delay_buffer))
1213 ifh.write(b"".join(self._delay_buffer))
1213 self._delay_buffer = None
1214 self._delay_buffer = None
1214 elif divert:
1215 elif divert:
1215 if self.opener.exists(self.index_file):
1216 if self.opener.exists(self.index_file):
1216 self.opener.rename(
1217 self.opener.rename(
1217 self.index_file,
1218 self.index_file,
1218 self._orig_index_file,
1219 self._orig_index_file,
1219 checkambig=True,
1220 checkambig=True,
1220 )
1221 )
1221 self.index_file = self._orig_index_file
1222 self.index_file = self._orig_index_file
1222 self._orig_index_file = None
1223 self._orig_index_file = None
1223 else:
1224 else:
1224 msg = b"not delay or divert found on this revlog"
1225 msg = b"not delay or divert found on this revlog"
1225 raise error.ProgrammingError(msg)
1226 raise error.ProgrammingError(msg)
1226 return self.canonical_index_file
1227 return self.canonical_index_file
1227
1228
1228
1229
1229 class revlog:
1230 class revlog:
1230 """
1231 """
1231 the underlying revision storage object
1232 the underlying revision storage object
1232
1233
1233 A revlog consists of two parts, an index and the revision data.
1234 A revlog consists of two parts, an index and the revision data.
1234
1235
1235 The index is a file with a fixed record size containing
1236 The index is a file with a fixed record size containing
1236 information on each revision, including its nodeid (hash), the
1237 information on each revision, including its nodeid (hash), the
1237 nodeids of its parents, the position and offset of its data within
1238 nodeids of its parents, the position and offset of its data within
1238 the data file, and the revision it's based on. Finally, each entry
1239 the data file, and the revision it's based on. Finally, each entry
1239 contains a linkrev entry that can serve as a pointer to external
1240 contains a linkrev entry that can serve as a pointer to external
1240 data.
1241 data.
1241
1242
1242 The revision data itself is a linear collection of data chunks.
1243 The revision data itself is a linear collection of data chunks.
1243 Each chunk represents a revision and is usually represented as a
1244 Each chunk represents a revision and is usually represented as a
1244 delta against the previous chunk. To bound lookup time, runs of
1245 delta against the previous chunk. To bound lookup time, runs of
1245 deltas are limited to about 2 times the length of the original
1246 deltas are limited to about 2 times the length of the original
1246 version data. This makes retrieval of a version proportional to
1247 version data. This makes retrieval of a version proportional to
1247 its size, or O(1) relative to the number of revisions.
1248 its size, or O(1) relative to the number of revisions.
1248
1249
1249 Both pieces of the revlog are written to in an append-only
1250 Both pieces of the revlog are written to in an append-only
1250 fashion, which means we never need to rewrite a file to insert or
1251 fashion, which means we never need to rewrite a file to insert or
1251 remove data, and can use some simple techniques to avoid the need
1252 remove data, and can use some simple techniques to avoid the need
1252 for locking while reading.
1253 for locking while reading.
1253
1254
1254 If checkambig, indexfile is opened with checkambig=True at
1255 If checkambig, indexfile is opened with checkambig=True at
1255 writing, to avoid file stat ambiguity.
1256 writing, to avoid file stat ambiguity.
1256
1257
1257 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1258 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1258 index will be mmapped rather than read if it is larger than the
1259 index will be mmapped rather than read if it is larger than the
1259 configured threshold.
1260 configured threshold.
1260
1261
1261 If censorable is True, the revlog can have censored revisions.
1262 If censorable is True, the revlog can have censored revisions.
1262
1263
1263 If `upperboundcomp` is not None, this is the expected maximal gain from
1264 If `upperboundcomp` is not None, this is the expected maximal gain from
1264 compression for the data content.
1265 compression for the data content.
1265
1266
1266 `concurrencychecker` is an optional function that receives 3 arguments: a
1267 `concurrencychecker` is an optional function that receives 3 arguments: a
1267 file handle, a filename, and an expected position. It should check whether
1268 file handle, a filename, and an expected position. It should check whether
1268 the current position in the file handle is valid, and log/warn/fail (by
1269 the current position in the file handle is valid, and log/warn/fail (by
1269 raising).
1270 raising).
1270
1271
1271 See mercurial/revlogutils/contants.py for details about the content of an
1272 See mercurial/revlogutils/contants.py for details about the content of an
1272 index entry.
1273 index entry.
1273 """
1274 """
1274
1275
1275 _flagserrorclass = error.RevlogError
1276 _flagserrorclass = error.RevlogError
1276
1277
1277 @staticmethod
1278 @staticmethod
1278 def is_inline_index(header_bytes):
1279 def is_inline_index(header_bytes):
1279 """Determine if a revlog is inline from the initial bytes of the index"""
1280 """Determine if a revlog is inline from the initial bytes of the index"""
1280 if len(header_bytes) == 0:
1281 if len(header_bytes) == 0:
1281 return True
1282 return True
1282
1283
1283 header = INDEX_HEADER.unpack(header_bytes)[0]
1284 header = INDEX_HEADER.unpack(header_bytes)[0]
1284
1285
1285 _format_flags = header & ~0xFFFF
1286 _format_flags = header & ~0xFFFF
1286 _format_version = header & 0xFFFF
1287 _format_version = header & 0xFFFF
1287
1288
1288 features = FEATURES_BY_VERSION[_format_version]
1289 features = FEATURES_BY_VERSION[_format_version]
1289 return features[b'inline'](_format_flags)
1290 return features[b'inline'](_format_flags)
1290
1291
1291 def __init__(
1292 def __init__(
1292 self,
1293 self,
1293 opener,
1294 opener,
1294 target,
1295 target,
1295 radix,
1296 radix,
1296 postfix=None, # only exist for `tmpcensored` now
1297 postfix=None, # only exist for `tmpcensored` now
1297 checkambig=False,
1298 checkambig=False,
1298 mmaplargeindex=False,
1299 mmaplargeindex=False,
1299 censorable=False,
1300 censorable=False,
1300 upperboundcomp=None,
1301 upperboundcomp=None,
1301 persistentnodemap=False,
1302 persistentnodemap=False,
1302 concurrencychecker=None,
1303 concurrencychecker=None,
1303 trypending=False,
1304 trypending=False,
1304 try_split=False,
1305 try_split=False,
1305 canonical_parent_order=True,
1306 canonical_parent_order=True,
1306 data_config=None,
1307 data_config=None,
1307 delta_config=None,
1308 delta_config=None,
1308 feature_config=None,
1309 feature_config=None,
1309 may_inline=True, # may inline new revlog
1310 may_inline=True, # may inline new revlog
1310 ):
1311 ):
1311 """
1312 """
1312 create a revlog object
1313 create a revlog object
1313
1314
1314 opener is a function that abstracts the file opening operation
1315 opener is a function that abstracts the file opening operation
1315 and can be used to implement COW semantics or the like.
1316 and can be used to implement COW semantics or the like.
1316
1317
1317 `target`: a (KIND, ID) tuple that identify the content stored in
1318 `target`: a (KIND, ID) tuple that identify the content stored in
1318 this revlog. It help the rest of the code to understand what the revlog
1319 this revlog. It help the rest of the code to understand what the revlog
1319 is about without having to resort to heuristic and index filename
1320 is about without having to resort to heuristic and index filename
1320 analysis. Note: that this must be reliably be set by normal code, but
1321 analysis. Note: that this must be reliably be set by normal code, but
1321 that test, debug, or performance measurement code might not set this to
1322 that test, debug, or performance measurement code might not set this to
1322 accurate value.
1323 accurate value.
1323 """
1324 """
1324
1325
1325 self.radix = radix
1326 self.radix = radix
1326
1327
1327 self._docket_file = None
1328 self._docket_file = None
1328 self._indexfile = None
1329 self._indexfile = None
1329 self._datafile = None
1330 self._datafile = None
1330 self._sidedatafile = None
1331 self._sidedatafile = None
1331 self._nodemap_file = None
1332 self._nodemap_file = None
1332 self.postfix = postfix
1333 self.postfix = postfix
1333 self._trypending = trypending
1334 self._trypending = trypending
1334 self._try_split = try_split
1335 self._try_split = try_split
1335 self._may_inline = may_inline
1336 self._may_inline = may_inline
1336 self.opener = opener
1337 self.opener = opener
1337 if persistentnodemap:
1338 if persistentnodemap:
1338 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1339 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1339
1340
1340 assert target[0] in ALL_KINDS
1341 assert target[0] in ALL_KINDS
1341 assert len(target) == 2
1342 assert len(target) == 2
1342 self.target = target
1343 self.target = target
1343 if feature_config is not None:
1344 if feature_config is not None:
1344 self.feature_config = feature_config.copy()
1345 self.feature_config = feature_config.copy()
1345 elif b'feature-config' in self.opener.options:
1346 elif b'feature-config' in self.opener.options:
1346 self.feature_config = self.opener.options[b'feature-config'].copy()
1347 self.feature_config = self.opener.options[b'feature-config'].copy()
1347 else:
1348 else:
1348 self.feature_config = FeatureConfig()
1349 self.feature_config = FeatureConfig()
1349 self.feature_config.censorable = censorable
1350 self.feature_config.censorable = censorable
1350 self.feature_config.canonical_parent_order = canonical_parent_order
1351 self.feature_config.canonical_parent_order = canonical_parent_order
1351 if data_config is not None:
1352 if data_config is not None:
1352 self.data_config = data_config.copy()
1353 self.data_config = data_config.copy()
1353 elif b'data-config' in self.opener.options:
1354 elif b'data-config' in self.opener.options:
1354 self.data_config = self.opener.options[b'data-config'].copy()
1355 self.data_config = self.opener.options[b'data-config'].copy()
1355 else:
1356 else:
1356 self.data_config = DataConfig()
1357 self.data_config = DataConfig()
1357 self.data_config.check_ambig = checkambig
1358 self.data_config.check_ambig = checkambig
1358 self.data_config.mmap_large_index = mmaplargeindex
1359 self.data_config.mmap_large_index = mmaplargeindex
1359 if delta_config is not None:
1360 if delta_config is not None:
1360 self.delta_config = delta_config.copy()
1361 self.delta_config = delta_config.copy()
1361 elif b'delta-config' in self.opener.options:
1362 elif b'delta-config' in self.opener.options:
1362 self.delta_config = self.opener.options[b'delta-config'].copy()
1363 self.delta_config = self.opener.options[b'delta-config'].copy()
1363 else:
1364 else:
1364 self.delta_config = DeltaConfig()
1365 self.delta_config = DeltaConfig()
1365 self.delta_config.upper_bound_comp = upperboundcomp
1366 self.delta_config.upper_bound_comp = upperboundcomp
1366
1367
1367 # Maps rev to chain base rev.
1368 # Maps rev to chain base rev.
1368 self._chainbasecache = util.lrucachedict(100)
1369 self._chainbasecache = util.lrucachedict(100)
1369
1370
1370 self.index = None
1371 self.index = None
1371 self._docket = None
1372 self._docket = None
1372 self._nodemap_docket = None
1373 self._nodemap_docket = None
1373 # Mapping of partial identifiers to full nodes.
1374 # Mapping of partial identifiers to full nodes.
1374 self._pcache = {}
1375 self._pcache = {}
1375
1376
1376 # other optionnals features
1377 # other optionnals features
1377
1378
1378 # Make copy of flag processors so each revlog instance can support
1379 # Make copy of flag processors so each revlog instance can support
1379 # custom flags.
1380 # custom flags.
1380 self._flagprocessors = dict(flagutil.flagprocessors)
1381 self._flagprocessors = dict(flagutil.flagprocessors)
1381 # prevent nesting of addgroup
1382 # prevent nesting of addgroup
1382 self._adding_group = None
1383 self._adding_group = None
1383
1384
1384 chunk_cache = self._loadindex()
1385 chunk_cache = self._loadindex()
1385 self._load_inner(chunk_cache)
1386 self._load_inner(chunk_cache)
1386 self._concurrencychecker = concurrencychecker
1387 self._concurrencychecker = concurrencychecker
1387
1388
1388 def _init_opts(self):
1389 def _init_opts(self):
1389 """process options (from above/config) to setup associated default revlog mode
1390 """process options (from above/config) to setup associated default revlog mode
1390
1391
1391 These values might be affected when actually reading on disk information.
1392 These values might be affected when actually reading on disk information.
1392
1393
1393 The relevant values are returned for use in _loadindex().
1394 The relevant values are returned for use in _loadindex().
1394
1395
1395 * newversionflags:
1396 * newversionflags:
1396 version header to use if we need to create a new revlog
1397 version header to use if we need to create a new revlog
1397
1398
1398 * mmapindexthreshold:
1399 * mmapindexthreshold:
1399 minimal index size for start to use mmap
1400 minimal index size for start to use mmap
1400
1401
1401 * force_nodemap:
1402 * force_nodemap:
1402 force the usage of a "development" version of the nodemap code
1403 force the usage of a "development" version of the nodemap code
1403 """
1404 """
1404 opts = self.opener.options
1405 opts = self.opener.options
1405
1406
1406 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1407 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1407 new_header = CHANGELOGV2
1408 new_header = CHANGELOGV2
1408 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1409 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1409 self.feature_config.compute_rank = compute_rank
1410 self.feature_config.compute_rank = compute_rank
1410 elif b'revlogv2' in opts:
1411 elif b'revlogv2' in opts:
1411 new_header = REVLOGV2
1412 new_header = REVLOGV2
1412 elif b'revlogv1' in opts:
1413 elif b'revlogv1' in opts:
1413 new_header = REVLOGV1
1414 new_header = REVLOGV1
1414 if self._may_inline:
1415 if self._may_inline:
1415 new_header |= FLAG_INLINE_DATA
1416 new_header |= FLAG_INLINE_DATA
1416 if b'generaldelta' in opts:
1417 if b'generaldelta' in opts:
1417 new_header |= FLAG_GENERALDELTA
1418 new_header |= FLAG_GENERALDELTA
1418 elif b'revlogv0' in self.opener.options:
1419 elif b'revlogv0' in self.opener.options:
1419 new_header = REVLOGV0
1420 new_header = REVLOGV0
1420 else:
1421 else:
1421 new_header = REVLOG_DEFAULT_VERSION
1422 new_header = REVLOG_DEFAULT_VERSION
1422
1423
1423 mmapindexthreshold = None
1424 mmapindexthreshold = None
1424 if self.data_config.mmap_large_index:
1425 if self.data_config.mmap_large_index:
1425 mmapindexthreshold = self.data_config.mmap_index_threshold
1426 mmapindexthreshold = self.data_config.mmap_index_threshold
1426 if self.feature_config.enable_ellipsis:
1427 if self.feature_config.enable_ellipsis:
1427 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1428 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1428
1429
1429 # revlog v0 doesn't have flag processors
1430 # revlog v0 doesn't have flag processors
1430 for flag, processor in opts.get(b'flagprocessors', {}).items():
1431 for flag, processor in opts.get(b'flagprocessors', {}).items():
1431 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1432 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1432
1433
1433 chunk_cache_size = self.data_config.chunk_cache_size
1434 chunk_cache_size = self.data_config.chunk_cache_size
1434 if chunk_cache_size <= 0:
1435 if chunk_cache_size <= 0:
1435 raise error.RevlogError(
1436 raise error.RevlogError(
1436 _(b'revlog chunk cache size %r is not greater than 0')
1437 _(b'revlog chunk cache size %r is not greater than 0')
1437 % chunk_cache_size
1438 % chunk_cache_size
1438 )
1439 )
1439 elif chunk_cache_size & (chunk_cache_size - 1):
1440 elif chunk_cache_size & (chunk_cache_size - 1):
1440 raise error.RevlogError(
1441 raise error.RevlogError(
1441 _(b'revlog chunk cache size %r is not a power of 2')
1442 _(b'revlog chunk cache size %r is not a power of 2')
1442 % chunk_cache_size
1443 % chunk_cache_size
1443 )
1444 )
1444 force_nodemap = opts.get(b'devel-force-nodemap', False)
1445 force_nodemap = opts.get(b'devel-force-nodemap', False)
1445 return new_header, mmapindexthreshold, force_nodemap
1446 return new_header, mmapindexthreshold, force_nodemap
1446
1447
1447 def _get_data(self, filepath, mmap_threshold, size=None):
1448 def _get_data(self, filepath, mmap_threshold, size=None):
1448 """return a file content with or without mmap
1449 """return a file content with or without mmap
1449
1450
1450 If the file is missing return the empty string"""
1451 If the file is missing return the empty string"""
1451 try:
1452 try:
1452 with self.opener(filepath) as fp:
1453 with self.opener(filepath) as fp:
1453 if mmap_threshold is not None:
1454 if mmap_threshold is not None:
1454 file_size = self.opener.fstat(fp).st_size
1455 file_size = self.opener.fstat(fp).st_size
1455 if file_size >= mmap_threshold:
1456 if file_size >= mmap_threshold:
1456 if size is not None:
1457 if size is not None:
1457 # avoid potentiel mmap crash
1458 # avoid potentiel mmap crash
1458 size = min(file_size, size)
1459 size = min(file_size, size)
1459 # TODO: should .close() to release resources without
1460 # TODO: should .close() to release resources without
1460 # relying on Python GC
1461 # relying on Python GC
1461 if size is None:
1462 if size is None:
1462 return util.buffer(util.mmapread(fp))
1463 return util.buffer(util.mmapread(fp))
1463 else:
1464 else:
1464 return util.buffer(util.mmapread(fp, size))
1465 return util.buffer(util.mmapread(fp, size))
1465 if size is None:
1466 if size is None:
1466 return fp.read()
1467 return fp.read()
1467 else:
1468 else:
1468 return fp.read(size)
1469 return fp.read(size)
1469 except FileNotFoundError:
1470 except FileNotFoundError:
1470 return b''
1471 return b''
1471
1472
1472 def get_streams(self, max_linkrev, force_inline=False):
1473 def get_streams(self, max_linkrev, force_inline=False):
1473 """return a list of streams that represent this revlog
1474 """return a list of streams that represent this revlog
1474
1475
1475 This is used by stream-clone to do bytes to bytes copies of a repository.
1476 This is used by stream-clone to do bytes to bytes copies of a repository.
1476
1477
1477 This streams data for all revisions that refer to a changelog revision up
1478 This streams data for all revisions that refer to a changelog revision up
1478 to `max_linkrev`.
1479 to `max_linkrev`.
1479
1480
1480 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1481 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1481
1482
1482 It returns is a list of three-tuple:
1483 It returns is a list of three-tuple:
1483
1484
1484 [
1485 [
1485 (filename, bytes_stream, stream_size),
1486 (filename, bytes_stream, stream_size),
1486 …
1487 …
1487 ]
1488 ]
1488 """
1489 """
1489 n = len(self)
1490 n = len(self)
1490 index = self.index
1491 index = self.index
1491 while n > 0:
1492 while n > 0:
1492 linkrev = index[n - 1][4]
1493 linkrev = index[n - 1][4]
1493 if linkrev < max_linkrev:
1494 if linkrev < max_linkrev:
1494 break
1495 break
1495 # note: this loop will rarely go through multiple iterations, since
1496 # note: this loop will rarely go through multiple iterations, since
1496 # it only traverses commits created during the current streaming
1497 # it only traverses commits created during the current streaming
1497 # pull operation.
1498 # pull operation.
1498 #
1499 #
1499 # If this become a problem, using a binary search should cap the
1500 # If this become a problem, using a binary search should cap the
1500 # runtime of this.
1501 # runtime of this.
1501 n = n - 1
1502 n = n - 1
1502 if n == 0:
1503 if n == 0:
1503 # no data to send
1504 # no data to send
1504 return []
1505 return []
1505 index_size = n * index.entry_size
1506 index_size = n * index.entry_size
1506 data_size = self.end(n - 1)
1507 data_size = self.end(n - 1)
1507
1508
1508 # XXX we might have been split (or stripped) since the object
1509 # XXX we might have been split (or stripped) since the object
1509 # initialization, We need to close this race too, but having a way to
1510 # initialization, We need to close this race too, but having a way to
1510 # pre-open the file we feed to the revlog and never closing them before
1511 # pre-open the file we feed to the revlog and never closing them before
1511 # we are done streaming.
1512 # we are done streaming.
1512
1513
1513 if self._inline:
1514 if self._inline:
1514
1515
1515 def get_stream():
1516 def get_stream():
1516 with self.opener(self._indexfile, mode=b"r") as fp:
1517 with self.opener(self._indexfile, mode=b"r") as fp:
1517 yield None
1518 yield None
1518 size = index_size + data_size
1519 size = index_size + data_size
1519 if size <= 65536:
1520 if size <= 65536:
1520 yield fp.read(size)
1521 yield fp.read(size)
1521 else:
1522 else:
1522 yield from util.filechunkiter(fp, limit=size)
1523 yield from util.filechunkiter(fp, limit=size)
1523
1524
1524 inline_stream = get_stream()
1525 inline_stream = get_stream()
1525 next(inline_stream)
1526 next(inline_stream)
1526 return [
1527 return [
1527 (self._indexfile, inline_stream, index_size + data_size),
1528 (self._indexfile, inline_stream, index_size + data_size),
1528 ]
1529 ]
1529 elif force_inline:
1530 elif force_inline:
1530
1531
1531 def get_stream():
1532 def get_stream():
1532 with self.reading():
1533 with self.reading():
1533 yield None
1534 yield None
1534
1535
1535 for rev in range(n):
1536 for rev in range(n):
1536 idx = self.index.entry_binary(rev)
1537 idx = self.index.entry_binary(rev)
1537 if rev == 0 and self._docket is None:
1538 if rev == 0 and self._docket is None:
1538 # re-inject the inline flag
1539 # re-inject the inline flag
1539 header = self._format_flags
1540 header = self._format_flags
1540 header |= self._format_version
1541 header |= self._format_version
1541 header |= FLAG_INLINE_DATA
1542 header |= FLAG_INLINE_DATA
1542 header = self.index.pack_header(header)
1543 header = self.index.pack_header(header)
1543 idx = header + idx
1544 idx = header + idx
1544 yield idx
1545 yield idx
1545 yield self._inner.get_segment_for_revs(rev, rev)[1]
1546 yield self._inner.get_segment_for_revs(rev, rev)[1]
1546
1547
1547 inline_stream = get_stream()
1548 inline_stream = get_stream()
1548 next(inline_stream)
1549 next(inline_stream)
1549 return [
1550 return [
1550 (self._indexfile, inline_stream, index_size + data_size),
1551 (self._indexfile, inline_stream, index_size + data_size),
1551 ]
1552 ]
1552 else:
1553 else:
1553
1554
1554 def get_index_stream():
1555 def get_index_stream():
1555 with self.opener(self._indexfile, mode=b"r") as fp:
1556 with self.opener(self._indexfile, mode=b"r") as fp:
1556 yield None
1557 yield None
1557 if index_size <= 65536:
1558 if index_size <= 65536:
1558 yield fp.read(index_size)
1559 yield fp.read(index_size)
1559 else:
1560 else:
1560 yield from util.filechunkiter(fp, limit=index_size)
1561 yield from util.filechunkiter(fp, limit=index_size)
1561
1562
1562 def get_data_stream():
1563 def get_data_stream():
1563 with self._datafp() as fp:
1564 with self._datafp() as fp:
1564 yield None
1565 yield None
1565 if data_size <= 65536:
1566 if data_size <= 65536:
1566 yield fp.read(data_size)
1567 yield fp.read(data_size)
1567 else:
1568 else:
1568 yield from util.filechunkiter(fp, limit=data_size)
1569 yield from util.filechunkiter(fp, limit=data_size)
1569
1570
1570 index_stream = get_index_stream()
1571 index_stream = get_index_stream()
1571 next(index_stream)
1572 next(index_stream)
1572 data_stream = get_data_stream()
1573 data_stream = get_data_stream()
1573 next(data_stream)
1574 next(data_stream)
1574 return [
1575 return [
1575 (self._datafile, data_stream, data_size),
1576 (self._datafile, data_stream, data_size),
1576 (self._indexfile, index_stream, index_size),
1577 (self._indexfile, index_stream, index_size),
1577 ]
1578 ]
1578
1579
1579 def _loadindex(self, docket=None):
1580 def _loadindex(self, docket=None):
1580
1581 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1581 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1582
1582
1583 if self.postfix is not None:
1583 if self.postfix is not None:
1584 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1584 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1585 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1585 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1586 entry_point = b'%s.i.a' % self.radix
1586 entry_point = b'%s.i.a' % self.radix
1587 elif self._try_split and self.opener.exists(self._split_index_file):
1587 elif self._try_split and self.opener.exists(self._split_index_file):
1588 entry_point = self._split_index_file
1588 entry_point = self._split_index_file
1589 else:
1589 else:
1590 entry_point = b'%s.i' % self.radix
1590 entry_point = b'%s.i' % self.radix
1591
1591
1592 if docket is not None:
1592 if docket is not None:
1593 self._docket = docket
1593 self._docket = docket
1594 self._docket_file = entry_point
1594 self._docket_file = entry_point
1595 else:
1595 else:
1596 self._initempty = True
1596 self._initempty = True
1597 entry_data = self._get_data(entry_point, mmapindexthreshold)
1597 entry_data = self._get_data(entry_point, mmapindexthreshold)
1598 if len(entry_data) > 0:
1598 if len(entry_data) > 0:
1599 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1599 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1600 self._initempty = False
1600 self._initempty = False
1601 else:
1601 else:
1602 header = new_header
1602 header = new_header
1603
1603
1604 self._format_flags = header & ~0xFFFF
1604 self._format_flags = header & ~0xFFFF
1605 self._format_version = header & 0xFFFF
1605 self._format_version = header & 0xFFFF
1606
1606
1607 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1607 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1608 if supported_flags is None:
1608 if supported_flags is None:
1609 msg = _(b'unknown version (%d) in revlog %s')
1609 msg = _(b'unknown version (%d) in revlog %s')
1610 msg %= (self._format_version, self.display_id)
1610 msg %= (self._format_version, self.display_id)
1611 raise error.RevlogError(msg)
1611 raise error.RevlogError(msg)
1612 elif self._format_flags & ~supported_flags:
1612 elif self._format_flags & ~supported_flags:
1613 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1613 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1614 display_flag = self._format_flags >> 16
1614 display_flag = self._format_flags >> 16
1615 msg %= (display_flag, self._format_version, self.display_id)
1615 msg %= (display_flag, self._format_version, self.display_id)
1616 raise error.RevlogError(msg)
1616 raise error.RevlogError(msg)
1617
1617
1618 features = FEATURES_BY_VERSION[self._format_version]
1618 features = FEATURES_BY_VERSION[self._format_version]
1619 self._inline = features[b'inline'](self._format_flags)
1619 self._inline = features[b'inline'](self._format_flags)
1620 self.delta_config.general_delta = features[b'generaldelta'](
1620 self.delta_config.general_delta = features[b'generaldelta'](
1621 self._format_flags
1621 self._format_flags
1622 )
1622 )
1623 self.feature_config.has_side_data = features[b'sidedata']
1623 self.feature_config.has_side_data = features[b'sidedata']
1624
1624
1625 if not features[b'docket']:
1625 if not features[b'docket']:
1626 self._indexfile = entry_point
1626 self._indexfile = entry_point
1627 index_data = entry_data
1627 index_data = entry_data
1628 else:
1628 else:
1629 self._docket_file = entry_point
1629 self._docket_file = entry_point
1630 if self._initempty:
1630 if self._initempty:
1631 self._docket = docketutil.default_docket(self, header)
1631 self._docket = docketutil.default_docket(self, header)
1632 else:
1632 else:
1633 self._docket = docketutil.parse_docket(
1633 self._docket = docketutil.parse_docket(
1634 self, entry_data, use_pending=self._trypending
1634 self, entry_data, use_pending=self._trypending
1635 )
1635 )
1636
1636
1637 if self._docket is not None:
1637 if self._docket is not None:
1638 self._indexfile = self._docket.index_filepath()
1638 self._indexfile = self._docket.index_filepath()
1639 index_data = b''
1639 index_data = b''
1640 index_size = self._docket.index_end
1640 index_size = self._docket.index_end
1641 if index_size > 0:
1641 if index_size > 0:
1642 index_data = self._get_data(
1642 index_data = self._get_data(
1643 self._indexfile, mmapindexthreshold, size=index_size
1643 self._indexfile, mmapindexthreshold, size=index_size
1644 )
1644 )
1645 if len(index_data) < index_size:
1645 if len(index_data) < index_size:
1646 msg = _(b'too few index data for %s: got %d, expected %d')
1646 msg = _(b'too few index data for %s: got %d, expected %d')
1647 msg %= (self.display_id, len(index_data), index_size)
1647 msg %= (self.display_id, len(index_data), index_size)
1648 raise error.RevlogError(msg)
1648 raise error.RevlogError(msg)
1649
1649
1650 self._inline = False
1650 self._inline = False
1651 # generaldelta implied by version 2 revlogs.
1651 # generaldelta implied by version 2 revlogs.
1652 self.delta_config.general_delta = True
1652 self.delta_config.general_delta = True
1653 # the logic for persistent nodemap will be dealt with within the
1653 # the logic for persistent nodemap will be dealt with within the
1654 # main docket, so disable it for now.
1654 # main docket, so disable it for now.
1655 self._nodemap_file = None
1655 self._nodemap_file = None
1656
1656
1657 if self._docket is not None:
1657 if self._docket is not None:
1658 self._datafile = self._docket.data_filepath()
1658 self._datafile = self._docket.data_filepath()
1659 self._sidedatafile = self._docket.sidedata_filepath()
1659 self._sidedatafile = self._docket.sidedata_filepath()
1660 elif self.postfix is None:
1660 elif self.postfix is None:
1661 self._datafile = b'%s.d' % self.radix
1661 self._datafile = b'%s.d' % self.radix
1662 else:
1662 else:
1663 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1663 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1664
1664
1665 self.nodeconstants = sha1nodeconstants
1665 self.nodeconstants = sha1nodeconstants
1666 self.nullid = self.nodeconstants.nullid
1666 self.nullid = self.nodeconstants.nullid
1667
1667
1668 # sparse-revlog can't be on without general-delta (issue6056)
1668 # sparse-revlog can't be on without general-delta (issue6056)
1669 if not self.delta_config.general_delta:
1669 if not self.delta_config.general_delta:
1670 self.delta_config.sparse_revlog = False
1670 self.delta_config.sparse_revlog = False
1671
1671
1672 self._storedeltachains = True
1672 self._storedeltachains = True
1673
1673
1674 devel_nodemap = (
1674 devel_nodemap = (
1675 self._nodemap_file
1675 self._nodemap_file
1676 and force_nodemap
1676 and force_nodemap
1677 and parse_index_v1_nodemap is not None
1677 and parse_index_v1_nodemap is not None
1678 )
1678 )
1679
1679
1680 use_rust_index = False
1680 use_rust_index = False
1681 if rustrevlog is not None:
1681 if rustrevlog is not None:
1682 if self._nodemap_file is not None:
1682 if self._nodemap_file is not None:
1683 use_rust_index = True
1683 use_rust_index = True
1684 else:
1684 else:
1685 # Using the CIndex is not longer possible, as the
1685 # Using the CIndex is not longer possible, as the
1686 # `AncestorsIterator` and `LazyAncestors` classes now require
1686 # `AncestorsIterator` and `LazyAncestors` classes now require
1687 # a Rust index for instantiation.
1687 # a Rust index for instantiation.
1688 use_rust_index = True
1688 use_rust_index = True
1689
1689
1690 self._parse_index = parse_index_v1
1690 self._parse_index = parse_index_v1
1691 if self._format_version == REVLOGV0:
1691 if self._format_version == REVLOGV0:
1692 self._parse_index = revlogv0.parse_index_v0
1692 self._parse_index = revlogv0.parse_index_v0
1693 elif self._format_version == REVLOGV2:
1693 elif self._format_version == REVLOGV2:
1694 self._parse_index = parse_index_v2
1694 self._parse_index = parse_index_v2
1695 elif self._format_version == CHANGELOGV2:
1695 elif self._format_version == CHANGELOGV2:
1696 self._parse_index = parse_index_cl_v2
1696 self._parse_index = parse_index_cl_v2
1697 elif devel_nodemap:
1697 elif devel_nodemap:
1698 self._parse_index = parse_index_v1_nodemap
1698 self._parse_index = parse_index_v1_nodemap
1699 elif use_rust_index:
1699 elif use_rust_index:
1700 self._parse_index = functools.partial(
1700 self._parse_index = functools.partial(
1701 parse_index_v1_rust, default_header=new_header
1701 parse_index_v1_rust, default_header=new_header
1702 )
1702 )
1703 try:
1703 try:
1704 d = self._parse_index(index_data, self._inline)
1704 d = self._parse_index(index_data, self._inline)
1705 index, chunkcache = d
1705 index, chunkcache = d
1706 use_nodemap = (
1706 use_nodemap = (
1707 not self._inline
1707 not self._inline
1708 and self._nodemap_file is not None
1708 and self._nodemap_file is not None
1709 and hasattr(index, 'update_nodemap_data')
1709 and hasattr(index, 'update_nodemap_data')
1710 )
1710 )
1711 if use_nodemap:
1711 if use_nodemap:
1712 nodemap_data = nodemaputil.persisted_data(self)
1712 nodemap_data = nodemaputil.persisted_data(self)
1713 if nodemap_data is not None:
1713 if nodemap_data is not None:
1714 docket = nodemap_data[0]
1714 docket = nodemap_data[0]
1715 if (
1715 if (
1716 len(d[0]) > docket.tip_rev
1716 len(d[0]) > docket.tip_rev
1717 and d[0][docket.tip_rev][7] == docket.tip_node
1717 and d[0][docket.tip_rev][7] == docket.tip_node
1718 ):
1718 ):
1719 # no changelog tampering
1719 # no changelog tampering
1720 self._nodemap_docket = docket
1720 self._nodemap_docket = docket
1721 index.update_nodemap_data(*nodemap_data)
1721 index.update_nodemap_data(*nodemap_data)
1722 except (ValueError, IndexError):
1722 except (ValueError, IndexError):
1723 raise error.RevlogError(
1723 raise error.RevlogError(
1724 _(b"index %s is corrupted") % self.display_id
1724 _(b"index %s is corrupted") % self.display_id
1725 )
1725 )
1726 self.index = index
1726 self.index = index
1727 # revnum -> (chain-length, sum-delta-length)
1727 # revnum -> (chain-length, sum-delta-length)
1728 self._chaininfocache = util.lrucachedict(500)
1728 self._chaininfocache = util.lrucachedict(500)
1729
1729
1730 return chunkcache
1730 return chunkcache
1731
1731
1732 def _load_inner(self, chunk_cache):
1732 def _load_inner(self, chunk_cache):
1733 if self._docket is None:
1733 if self._docket is None:
1734 default_compression_header = None
1734 default_compression_header = None
1735 else:
1735 else:
1736 default_compression_header = self._docket.default_compression_header
1736 default_compression_header = self._docket.default_compression_header
1737
1737
1738 self._inner = _InnerRevlog(
1738 self._inner = _InnerRevlog(
1739 opener=self.opener,
1739 opener=self.opener,
1740 index=self.index,
1740 index=self.index,
1741 index_file=self._indexfile,
1741 index_file=self._indexfile,
1742 data_file=self._datafile,
1742 data_file=self._datafile,
1743 sidedata_file=self._sidedatafile,
1743 sidedata_file=self._sidedatafile,
1744 inline=self._inline,
1744 inline=self._inline,
1745 data_config=self.data_config,
1745 data_config=self.data_config,
1746 delta_config=self.delta_config,
1746 delta_config=self.delta_config,
1747 feature_config=self.feature_config,
1747 feature_config=self.feature_config,
1748 chunk_cache=chunk_cache,
1748 chunk_cache=chunk_cache,
1749 default_compression_header=default_compression_header,
1749 default_compression_header=default_compression_header,
1750 )
1750 )
1751
1751
1752 def get_revlog(self):
1752 def get_revlog(self):
1753 """simple function to mirror API of other not-really-revlog API"""
1753 """simple function to mirror API of other not-really-revlog API"""
1754 return self
1754 return self
1755
1755
1756 @util.propertycache
1756 @util.propertycache
1757 def revlog_kind(self):
1757 def revlog_kind(self):
1758 return self.target[0]
1758 return self.target[0]
1759
1759
1760 @util.propertycache
1760 @util.propertycache
1761 def display_id(self):
1761 def display_id(self):
1762 """The public facing "ID" of the revlog that we use in message"""
1762 """The public facing "ID" of the revlog that we use in message"""
1763 if self.revlog_kind == KIND_FILELOG:
1763 if self.revlog_kind == KIND_FILELOG:
1764 # Reference the file without the "data/" prefix, so it is familiar
1764 # Reference the file without the "data/" prefix, so it is familiar
1765 # to the user.
1765 # to the user.
1766 return self.target[1]
1766 return self.target[1]
1767 else:
1767 else:
1768 return self.radix
1768 return self.radix
1769
1769
1770 def _datafp(self, mode=b'r'):
1770 def _datafp(self, mode=b'r'):
1771 """file object for the revlog's data file"""
1771 """file object for the revlog's data file"""
1772 return self.opener(self._datafile, mode=mode)
1772 return self.opener(self._datafile, mode=mode)
1773
1773
1774 def tiprev(self):
1774 def tiprev(self):
1775 return len(self.index) - 1
1775 return len(self.index) - 1
1776
1776
1777 def tip(self):
1777 def tip(self):
1778 return self.node(self.tiprev())
1778 return self.node(self.tiprev())
1779
1779
1780 def __contains__(self, rev):
1780 def __contains__(self, rev):
1781 return 0 <= rev < len(self)
1781 return 0 <= rev < len(self)
1782
1782
1783 def __len__(self):
1783 def __len__(self):
1784 return len(self.index)
1784 return len(self.index)
1785
1785
1786 def __iter__(self):
1786 def __iter__(self):
1787 return iter(range(len(self)))
1787 return iter(range(len(self)))
1788
1788
1789 def revs(self, start=0, stop=None):
1789 def revs(self, start=0, stop=None):
1790 """iterate over all rev in this revlog (from start to stop)"""
1790 """iterate over all rev in this revlog (from start to stop)"""
1791 return storageutil.iterrevs(len(self), start=start, stop=stop)
1791 return storageutil.iterrevs(len(self), start=start, stop=stop)
1792
1792
1793 def hasnode(self, node):
1793 def hasnode(self, node):
1794 try:
1794 try:
1795 self.rev(node)
1795 self.rev(node)
1796 return True
1796 return True
1797 except KeyError:
1797 except KeyError:
1798 return False
1798 return False
1799
1799
1800 def _candelta(self, baserev, rev):
1800 def _candelta(self, baserev, rev):
1801 """whether two revisions (baserev, rev) can be delta-ed or not"""
1801 """whether two revisions (baserev, rev) can be delta-ed or not"""
1802 # Disable delta if either rev requires a content-changing flag
1802 # Disable delta if either rev requires a content-changing flag
1803 # processor (ex. LFS). This is because such flag processor can alter
1803 # processor (ex. LFS). This is because such flag processor can alter
1804 # the rawtext content that the delta will be based on, and two clients
1804 # the rawtext content that the delta will be based on, and two clients
1805 # could have a same revlog node with different flags (i.e. different
1805 # could have a same revlog node with different flags (i.e. different
1806 # rawtext contents) and the delta could be incompatible.
1806 # rawtext contents) and the delta could be incompatible.
1807 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1807 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1808 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1808 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1809 ):
1809 ):
1810 return False
1810 return False
1811 return True
1811 return True
1812
1812
1813 def update_caches(self, transaction):
1813 def update_caches(self, transaction):
1814 """update on disk cache
1814 """update on disk cache
1815
1815
1816 If a transaction is passed, the update may be delayed to transaction
1816 If a transaction is passed, the update may be delayed to transaction
1817 commit."""
1817 commit."""
1818 if self._nodemap_file is not None:
1818 if self._nodemap_file is not None:
1819 if transaction is None:
1819 if transaction is None:
1820 nodemaputil.update_persistent_nodemap(self)
1820 nodemaputil.update_persistent_nodemap(self)
1821 else:
1821 else:
1822 nodemaputil.setup_persistent_nodemap(transaction, self)
1822 nodemaputil.setup_persistent_nodemap(transaction, self)
1823
1823
1824 def clearcaches(self):
1824 def clearcaches(self):
1825 """Clear in-memory caches"""
1825 """Clear in-memory caches"""
1826 self._chainbasecache.clear()
1826 self._chainbasecache.clear()
1827 self._inner.clear_cache()
1827 self._inner.clear_cache()
1828 self._pcache = {}
1828 self._pcache = {}
1829 self._nodemap_docket = None
1829 self._nodemap_docket = None
1830 self.index.clearcaches()
1830 self.index.clearcaches()
1831 # The python code is the one responsible for validating the docket, we
1831 # The python code is the one responsible for validating the docket, we
1832 # end up having to refresh it here.
1832 # end up having to refresh it here.
1833 use_nodemap = (
1833 use_nodemap = (
1834 not self._inline
1834 not self._inline
1835 and self._nodemap_file is not None
1835 and self._nodemap_file is not None
1836 and hasattr(self.index, 'update_nodemap_data')
1836 and hasattr(self.index, 'update_nodemap_data')
1837 )
1837 )
1838 if use_nodemap:
1838 if use_nodemap:
1839 nodemap_data = nodemaputil.persisted_data(self)
1839 nodemap_data = nodemaputil.persisted_data(self)
1840 if nodemap_data is not None:
1840 if nodemap_data is not None:
1841 self._nodemap_docket = nodemap_data[0]
1841 self._nodemap_docket = nodemap_data[0]
1842 self.index.update_nodemap_data(*nodemap_data)
1842 self.index.update_nodemap_data(*nodemap_data)
1843
1843
1844 def rev(self, node):
1844 def rev(self, node):
1845 """return the revision number associated with a <nodeid>"""
1845 """return the revision number associated with a <nodeid>"""
1846 try:
1846 try:
1847 return self.index.rev(node)
1847 return self.index.rev(node)
1848 except TypeError:
1848 except TypeError:
1849 raise
1849 raise
1850 except error.RevlogError:
1850 except error.RevlogError:
1851 # parsers.c radix tree lookup failed
1851 # parsers.c radix tree lookup failed
1852 if (
1852 if (
1853 node == self.nodeconstants.wdirid
1853 node == self.nodeconstants.wdirid
1854 or node in self.nodeconstants.wdirfilenodeids
1854 or node in self.nodeconstants.wdirfilenodeids
1855 ):
1855 ):
1856 raise error.WdirUnsupported
1856 raise error.WdirUnsupported
1857 raise error.LookupError(node, self.display_id, _(b'no node'))
1857 raise error.LookupError(node, self.display_id, _(b'no node'))
1858
1858
1859 # Accessors for index entries.
1859 # Accessors for index entries.
1860
1860
1861 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1861 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1862 # are flags.
1862 # are flags.
1863 def start(self, rev):
1863 def start(self, rev):
1864 return int(self.index[rev][0] >> 16)
1864 return int(self.index[rev][0] >> 16)
1865
1865
1866 def sidedata_cut_off(self, rev):
1866 def sidedata_cut_off(self, rev):
1867 sd_cut_off = self.index[rev][8]
1867 sd_cut_off = self.index[rev][8]
1868 if sd_cut_off != 0:
1868 if sd_cut_off != 0:
1869 return sd_cut_off
1869 return sd_cut_off
1870 # This is some annoying dance, because entries without sidedata
1870 # This is some annoying dance, because entries without sidedata
1871 # currently use 0 as their ofsset. (instead of previous-offset +
1871 # currently use 0 as their ofsset. (instead of previous-offset +
1872 # previous-size)
1872 # previous-size)
1873 #
1873 #
1874 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1874 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1875 # In the meantime, we need this.
1875 # In the meantime, we need this.
1876 while 0 <= rev:
1876 while 0 <= rev:
1877 e = self.index[rev]
1877 e = self.index[rev]
1878 if e[9] != 0:
1878 if e[9] != 0:
1879 return e[8] + e[9]
1879 return e[8] + e[9]
1880 rev -= 1
1880 rev -= 1
1881 return 0
1881 return 0
1882
1882
1883 def flags(self, rev):
1883 def flags(self, rev):
1884 return self.index[rev][0] & 0xFFFF
1884 return self.index[rev][0] & 0xFFFF
1885
1885
1886 def length(self, rev):
1886 def length(self, rev):
1887 return self.index[rev][1]
1887 return self.index[rev][1]
1888
1888
1889 def sidedata_length(self, rev):
1889 def sidedata_length(self, rev):
1890 if not self.feature_config.has_side_data:
1890 if not self.feature_config.has_side_data:
1891 return 0
1891 return 0
1892 return self.index[rev][9]
1892 return self.index[rev][9]
1893
1893
1894 def rawsize(self, rev):
1894 def rawsize(self, rev):
1895 """return the length of the uncompressed text for a given revision"""
1895 """return the length of the uncompressed text for a given revision"""
1896 l = self.index[rev][2]
1896 l = self.index[rev][2]
1897 if l >= 0:
1897 if l >= 0:
1898 return l
1898 return l
1899
1899
1900 t = self.rawdata(rev)
1900 t = self.rawdata(rev)
1901 return len(t)
1901 return len(t)
1902
1902
1903 def size(self, rev):
1903 def size(self, rev):
1904 """length of non-raw text (processed by a "read" flag processor)"""
1904 """length of non-raw text (processed by a "read" flag processor)"""
1905 # fast path: if no "read" flag processor could change the content,
1905 # fast path: if no "read" flag processor could change the content,
1906 # size is rawsize. note: ELLIPSIS is known to not change the content.
1906 # size is rawsize. note: ELLIPSIS is known to not change the content.
1907 flags = self.flags(rev)
1907 flags = self.flags(rev)
1908 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1908 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1909 return self.rawsize(rev)
1909 return self.rawsize(rev)
1910
1910
1911 return len(self.revision(rev))
1911 return len(self.revision(rev))
1912
1912
1913 def fast_rank(self, rev):
1913 def fast_rank(self, rev):
1914 """Return the rank of a revision if already known, or None otherwise.
1914 """Return the rank of a revision if already known, or None otherwise.
1915
1915
1916 The rank of a revision is the size of the sub-graph it defines as a
1916 The rank of a revision is the size of the sub-graph it defines as a
1917 head. Equivalently, the rank of a revision `r` is the size of the set
1917 head. Equivalently, the rank of a revision `r` is the size of the set
1918 `ancestors(r)`, `r` included.
1918 `ancestors(r)`, `r` included.
1919
1919
1920 This method returns the rank retrieved from the revlog in constant
1920 This method returns the rank retrieved from the revlog in constant
1921 time. It makes no attempt at computing unknown values for versions of
1921 time. It makes no attempt at computing unknown values for versions of
1922 the revlog which do not persist the rank.
1922 the revlog which do not persist the rank.
1923 """
1923 """
1924 rank = self.index[rev][ENTRY_RANK]
1924 rank = self.index[rev][ENTRY_RANK]
1925 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1925 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1926 return None
1926 return None
1927 if rev == nullrev:
1927 if rev == nullrev:
1928 return 0 # convention
1928 return 0 # convention
1929 return rank
1929 return rank
1930
1930
1931 def chainbase(self, rev):
1931 def chainbase(self, rev):
1932 base = self._chainbasecache.get(rev)
1932 base = self._chainbasecache.get(rev)
1933 if base is not None:
1933 if base is not None:
1934 return base
1934 return base
1935
1935
1936 index = self.index
1936 index = self.index
1937 iterrev = rev
1937 iterrev = rev
1938 base = index[iterrev][3]
1938 base = index[iterrev][3]
1939 while base != iterrev:
1939 while base != iterrev:
1940 iterrev = base
1940 iterrev = base
1941 base = index[iterrev][3]
1941 base = index[iterrev][3]
1942
1942
1943 self._chainbasecache[rev] = base
1943 self._chainbasecache[rev] = base
1944 return base
1944 return base
1945
1945
1946 def linkrev(self, rev):
1946 def linkrev(self, rev):
1947 return self.index[rev][4]
1947 return self.index[rev][4]
1948
1948
1949 def parentrevs(self, rev):
1949 def parentrevs(self, rev):
1950 try:
1950 try:
1951 entry = self.index[rev]
1951 entry = self.index[rev]
1952 except IndexError:
1952 except IndexError:
1953 if rev == wdirrev:
1953 if rev == wdirrev:
1954 raise error.WdirUnsupported
1954 raise error.WdirUnsupported
1955 raise
1955 raise
1956
1956
1957 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1957 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1958 return entry[6], entry[5]
1958 return entry[6], entry[5]
1959 else:
1959 else:
1960 return entry[5], entry[6]
1960 return entry[5], entry[6]
1961
1961
1962 # fast parentrevs(rev) where rev isn't filtered
1962 # fast parentrevs(rev) where rev isn't filtered
1963 _uncheckedparentrevs = parentrevs
1963 _uncheckedparentrevs = parentrevs
1964
1964
1965 def node(self, rev):
1965 def node(self, rev):
1966 try:
1966 try:
1967 return self.index[rev][7]
1967 return self.index[rev][7]
1968 except IndexError:
1968 except IndexError:
1969 if rev == wdirrev:
1969 if rev == wdirrev:
1970 raise error.WdirUnsupported
1970 raise error.WdirUnsupported
1971 raise
1971 raise
1972
1972
1973 # Derived from index values.
1973 # Derived from index values.
1974
1974
1975 def end(self, rev):
1975 def end(self, rev):
1976 return self.start(rev) + self.length(rev)
1976 return self.start(rev) + self.length(rev)
1977
1977
1978 def parents(self, node):
1978 def parents(self, node):
1979 i = self.index
1979 i = self.index
1980 d = i[self.rev(node)]
1980 d = i[self.rev(node)]
1981 # inline node() to avoid function call overhead
1981 # inline node() to avoid function call overhead
1982 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1982 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1983 return i[d[6]][7], i[d[5]][7]
1983 return i[d[6]][7], i[d[5]][7]
1984 else:
1984 else:
1985 return i[d[5]][7], i[d[6]][7]
1985 return i[d[5]][7], i[d[6]][7]
1986
1986
1987 def chainlen(self, rev):
1987 def chainlen(self, rev):
1988 return self._chaininfo(rev)[0]
1988 return self._chaininfo(rev)[0]
1989
1989
1990 def _chaininfo(self, rev):
1990 def _chaininfo(self, rev):
1991 chaininfocache = self._chaininfocache
1991 chaininfocache = self._chaininfocache
1992 if rev in chaininfocache:
1992 if rev in chaininfocache:
1993 return chaininfocache[rev]
1993 return chaininfocache[rev]
1994 index = self.index
1994 index = self.index
1995 generaldelta = self.delta_config.general_delta
1995 generaldelta = self.delta_config.general_delta
1996 iterrev = rev
1996 iterrev = rev
1997 e = index[iterrev]
1997 e = index[iterrev]
1998 clen = 0
1998 clen = 0
1999 compresseddeltalen = 0
1999 compresseddeltalen = 0
2000 while iterrev != e[3]:
2000 while iterrev != e[3]:
2001 clen += 1
2001 clen += 1
2002 compresseddeltalen += e[1]
2002 compresseddeltalen += e[1]
2003 if generaldelta:
2003 if generaldelta:
2004 iterrev = e[3]
2004 iterrev = e[3]
2005 else:
2005 else:
2006 iterrev -= 1
2006 iterrev -= 1
2007 if iterrev in chaininfocache:
2007 if iterrev in chaininfocache:
2008 t = chaininfocache[iterrev]
2008 t = chaininfocache[iterrev]
2009 clen += t[0]
2009 clen += t[0]
2010 compresseddeltalen += t[1]
2010 compresseddeltalen += t[1]
2011 break
2011 break
2012 e = index[iterrev]
2012 e = index[iterrev]
2013 else:
2013 else:
2014 # Add text length of base since decompressing that also takes
2014 # Add text length of base since decompressing that also takes
2015 # work. For cache hits the length is already included.
2015 # work. For cache hits the length is already included.
2016 compresseddeltalen += e[1]
2016 compresseddeltalen += e[1]
2017 r = (clen, compresseddeltalen)
2017 r = (clen, compresseddeltalen)
2018 chaininfocache[rev] = r
2018 chaininfocache[rev] = r
2019 return r
2019 return r
2020
2020
2021 def _deltachain(self, rev, stoprev=None):
2021 def _deltachain(self, rev, stoprev=None):
2022 return self._inner._deltachain(rev, stoprev=stoprev)
2022 return self._inner._deltachain(rev, stoprev=stoprev)
2023
2023
2024 def ancestors(self, revs, stoprev=0, inclusive=False):
2024 def ancestors(self, revs, stoprev=0, inclusive=False):
2025 """Generate the ancestors of 'revs' in reverse revision order.
2025 """Generate the ancestors of 'revs' in reverse revision order.
2026 Does not generate revs lower than stoprev.
2026 Does not generate revs lower than stoprev.
2027
2027
2028 See the documentation for ancestor.lazyancestors for more details."""
2028 See the documentation for ancestor.lazyancestors for more details."""
2029
2029
2030 # first, make sure start revisions aren't filtered
2030 # first, make sure start revisions aren't filtered
2031 revs = list(revs)
2031 revs = list(revs)
2032 checkrev = self.node
2032 checkrev = self.node
2033 for r in revs:
2033 for r in revs:
2034 checkrev(r)
2034 checkrev(r)
2035 # and we're sure ancestors aren't filtered as well
2035 # and we're sure ancestors aren't filtered as well
2036
2036
2037 if rustancestor is not None and self.index.rust_ext_compat:
2037 if rustancestor is not None and self.index.rust_ext_compat:
2038 lazyancestors = rustancestor.LazyAncestors
2038 lazyancestors = rustancestor.LazyAncestors
2039 arg = self.index
2039 arg = self.index
2040 else:
2040 else:
2041 lazyancestors = ancestor.lazyancestors
2041 lazyancestors = ancestor.lazyancestors
2042 arg = self._uncheckedparentrevs
2042 arg = self._uncheckedparentrevs
2043 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2043 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2044
2044
2045 def descendants(self, revs):
2045 def descendants(self, revs):
2046 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2046 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2047
2047
2048 def findcommonmissing(self, common=None, heads=None):
2048 def findcommonmissing(self, common=None, heads=None):
2049 """Return a tuple of the ancestors of common and the ancestors of heads
2049 """Return a tuple of the ancestors of common and the ancestors of heads
2050 that are not ancestors of common. In revset terminology, we return the
2050 that are not ancestors of common. In revset terminology, we return the
2051 tuple:
2051 tuple:
2052
2052
2053 ::common, (::heads) - (::common)
2053 ::common, (::heads) - (::common)
2054
2054
2055 The list is sorted by revision number, meaning it is
2055 The list is sorted by revision number, meaning it is
2056 topologically sorted.
2056 topologically sorted.
2057
2057
2058 'heads' and 'common' are both lists of node IDs. If heads is
2058 'heads' and 'common' are both lists of node IDs. If heads is
2059 not supplied, uses all of the revlog's heads. If common is not
2059 not supplied, uses all of the revlog's heads. If common is not
2060 supplied, uses nullid."""
2060 supplied, uses nullid."""
2061 if common is None:
2061 if common is None:
2062 common = [self.nullid]
2062 common = [self.nullid]
2063 if heads is None:
2063 if heads is None:
2064 heads = self.heads()
2064 heads = self.heads()
2065
2065
2066 common = [self.rev(n) for n in common]
2066 common = [self.rev(n) for n in common]
2067 heads = [self.rev(n) for n in heads]
2067 heads = [self.rev(n) for n in heads]
2068
2068
2069 # we want the ancestors, but inclusive
2069 # we want the ancestors, but inclusive
2070 class lazyset:
2070 class lazyset:
2071 def __init__(self, lazyvalues):
2071 def __init__(self, lazyvalues):
2072 self.addedvalues = set()
2072 self.addedvalues = set()
2073 self.lazyvalues = lazyvalues
2073 self.lazyvalues = lazyvalues
2074
2074
2075 def __contains__(self, value):
2075 def __contains__(self, value):
2076 return value in self.addedvalues or value in self.lazyvalues
2076 return value in self.addedvalues or value in self.lazyvalues
2077
2077
2078 def __iter__(self):
2078 def __iter__(self):
2079 added = self.addedvalues
2079 added = self.addedvalues
2080 for r in added:
2080 for r in added:
2081 yield r
2081 yield r
2082 for r in self.lazyvalues:
2082 for r in self.lazyvalues:
2083 if not r in added:
2083 if not r in added:
2084 yield r
2084 yield r
2085
2085
2086 def add(self, value):
2086 def add(self, value):
2087 self.addedvalues.add(value)
2087 self.addedvalues.add(value)
2088
2088
2089 def update(self, values):
2089 def update(self, values):
2090 self.addedvalues.update(values)
2090 self.addedvalues.update(values)
2091
2091
2092 has = lazyset(self.ancestors(common))
2092 has = lazyset(self.ancestors(common))
2093 has.add(nullrev)
2093 has.add(nullrev)
2094 has.update(common)
2094 has.update(common)
2095
2095
2096 # take all ancestors from heads that aren't in has
2096 # take all ancestors from heads that aren't in has
2097 missing = set()
2097 missing = set()
2098 visit = collections.deque(r for r in heads if r not in has)
2098 visit = collections.deque(r for r in heads if r not in has)
2099 while visit:
2099 while visit:
2100 r = visit.popleft()
2100 r = visit.popleft()
2101 if r in missing:
2101 if r in missing:
2102 continue
2102 continue
2103 else:
2103 else:
2104 missing.add(r)
2104 missing.add(r)
2105 for p in self.parentrevs(r):
2105 for p in self.parentrevs(r):
2106 if p not in has:
2106 if p not in has:
2107 visit.append(p)
2107 visit.append(p)
2108 missing = list(missing)
2108 missing = list(missing)
2109 missing.sort()
2109 missing.sort()
2110 return has, [self.node(miss) for miss in missing]
2110 return has, [self.node(miss) for miss in missing]
2111
2111
2112 def incrementalmissingrevs(self, common=None):
2112 def incrementalmissingrevs(self, common=None):
2113 """Return an object that can be used to incrementally compute the
2113 """Return an object that can be used to incrementally compute the
2114 revision numbers of the ancestors of arbitrary sets that are not
2114 revision numbers of the ancestors of arbitrary sets that are not
2115 ancestors of common. This is an ancestor.incrementalmissingancestors
2115 ancestors of common. This is an ancestor.incrementalmissingancestors
2116 object.
2116 object.
2117
2117
2118 'common' is a list of revision numbers. If common is not supplied, uses
2118 'common' is a list of revision numbers. If common is not supplied, uses
2119 nullrev.
2119 nullrev.
2120 """
2120 """
2121 if common is None:
2121 if common is None:
2122 common = [nullrev]
2122 common = [nullrev]
2123
2123
2124 if rustancestor is not None and self.index.rust_ext_compat:
2124 if rustancestor is not None and self.index.rust_ext_compat:
2125 return rustancestor.MissingAncestors(self.index, common)
2125 return rustancestor.MissingAncestors(self.index, common)
2126 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2126 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2127
2127
2128 def findmissingrevs(self, common=None, heads=None):
2128 def findmissingrevs(self, common=None, heads=None):
2129 """Return the revision numbers of the ancestors of heads that
2129 """Return the revision numbers of the ancestors of heads that
2130 are not ancestors of common.
2130 are not ancestors of common.
2131
2131
2132 More specifically, return a list of revision numbers corresponding to
2132 More specifically, return a list of revision numbers corresponding to
2133 nodes N such that every N satisfies the following constraints:
2133 nodes N such that every N satisfies the following constraints:
2134
2134
2135 1. N is an ancestor of some node in 'heads'
2135 1. N is an ancestor of some node in 'heads'
2136 2. N is not an ancestor of any node in 'common'
2136 2. N is not an ancestor of any node in 'common'
2137
2137
2138 The list is sorted by revision number, meaning it is
2138 The list is sorted by revision number, meaning it is
2139 topologically sorted.
2139 topologically sorted.
2140
2140
2141 'heads' and 'common' are both lists of revision numbers. If heads is
2141 'heads' and 'common' are both lists of revision numbers. If heads is
2142 not supplied, uses all of the revlog's heads. If common is not
2142 not supplied, uses all of the revlog's heads. If common is not
2143 supplied, uses nullid."""
2143 supplied, uses nullid."""
2144 if common is None:
2144 if common is None:
2145 common = [nullrev]
2145 common = [nullrev]
2146 if heads is None:
2146 if heads is None:
2147 heads = self.headrevs()
2147 heads = self.headrevs()
2148
2148
2149 inc = self.incrementalmissingrevs(common=common)
2149 inc = self.incrementalmissingrevs(common=common)
2150 return inc.missingancestors(heads)
2150 return inc.missingancestors(heads)
2151
2151
2152 def findmissing(self, common=None, heads=None):
2152 def findmissing(self, common=None, heads=None):
2153 """Return the ancestors of heads that are not ancestors of common.
2153 """Return the ancestors of heads that are not ancestors of common.
2154
2154
2155 More specifically, return a list of nodes N such that every N
2155 More specifically, return a list of nodes N such that every N
2156 satisfies the following constraints:
2156 satisfies the following constraints:
2157
2157
2158 1. N is an ancestor of some node in 'heads'
2158 1. N is an ancestor of some node in 'heads'
2159 2. N is not an ancestor of any node in 'common'
2159 2. N is not an ancestor of any node in 'common'
2160
2160
2161 The list is sorted by revision number, meaning it is
2161 The list is sorted by revision number, meaning it is
2162 topologically sorted.
2162 topologically sorted.
2163
2163
2164 'heads' and 'common' are both lists of node IDs. If heads is
2164 'heads' and 'common' are both lists of node IDs. If heads is
2165 not supplied, uses all of the revlog's heads. If common is not
2165 not supplied, uses all of the revlog's heads. If common is not
2166 supplied, uses nullid."""
2166 supplied, uses nullid."""
2167 if common is None:
2167 if common is None:
2168 common = [self.nullid]
2168 common = [self.nullid]
2169 if heads is None:
2169 if heads is None:
2170 heads = self.heads()
2170 heads = self.heads()
2171
2171
2172 common = [self.rev(n) for n in common]
2172 common = [self.rev(n) for n in common]
2173 heads = [self.rev(n) for n in heads]
2173 heads = [self.rev(n) for n in heads]
2174
2174
2175 inc = self.incrementalmissingrevs(common=common)
2175 inc = self.incrementalmissingrevs(common=common)
2176 return [self.node(r) for r in inc.missingancestors(heads)]
2176 return [self.node(r) for r in inc.missingancestors(heads)]
2177
2177
2178 def nodesbetween(self, roots=None, heads=None):
2178 def nodesbetween(self, roots=None, heads=None):
2179 """Return a topological path from 'roots' to 'heads'.
2179 """Return a topological path from 'roots' to 'heads'.
2180
2180
2181 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2181 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2182 topologically sorted list of all nodes N that satisfy both of
2182 topologically sorted list of all nodes N that satisfy both of
2183 these constraints:
2183 these constraints:
2184
2184
2185 1. N is a descendant of some node in 'roots'
2185 1. N is a descendant of some node in 'roots'
2186 2. N is an ancestor of some node in 'heads'
2186 2. N is an ancestor of some node in 'heads'
2187
2187
2188 Every node is considered to be both a descendant and an ancestor
2188 Every node is considered to be both a descendant and an ancestor
2189 of itself, so every reachable node in 'roots' and 'heads' will be
2189 of itself, so every reachable node in 'roots' and 'heads' will be
2190 included in 'nodes'.
2190 included in 'nodes'.
2191
2191
2192 'outroots' is the list of reachable nodes in 'roots', i.e., the
2192 'outroots' is the list of reachable nodes in 'roots', i.e., the
2193 subset of 'roots' that is returned in 'nodes'. Likewise,
2193 subset of 'roots' that is returned in 'nodes'. Likewise,
2194 'outheads' is the subset of 'heads' that is also in 'nodes'.
2194 'outheads' is the subset of 'heads' that is also in 'nodes'.
2195
2195
2196 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2196 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2197 unspecified, uses nullid as the only root. If 'heads' is
2197 unspecified, uses nullid as the only root. If 'heads' is
2198 unspecified, uses list of all of the revlog's heads."""
2198 unspecified, uses list of all of the revlog's heads."""
2199 nonodes = ([], [], [])
2199 nonodes = ([], [], [])
2200 if roots is not None:
2200 if roots is not None:
2201 roots = list(roots)
2201 roots = list(roots)
2202 if not roots:
2202 if not roots:
2203 return nonodes
2203 return nonodes
2204 lowestrev = min([self.rev(n) for n in roots])
2204 lowestrev = min([self.rev(n) for n in roots])
2205 else:
2205 else:
2206 roots = [self.nullid] # Everybody's a descendant of nullid
2206 roots = [self.nullid] # Everybody's a descendant of nullid
2207 lowestrev = nullrev
2207 lowestrev = nullrev
2208 if (lowestrev == nullrev) and (heads is None):
2208 if (lowestrev == nullrev) and (heads is None):
2209 # We want _all_ the nodes!
2209 # We want _all_ the nodes!
2210 return (
2210 return (
2211 [self.node(r) for r in self],
2211 [self.node(r) for r in self],
2212 [self.nullid],
2212 [self.nullid],
2213 list(self.heads()),
2213 list(self.heads()),
2214 )
2214 )
2215 if heads is None:
2215 if heads is None:
2216 # All nodes are ancestors, so the latest ancestor is the last
2216 # All nodes are ancestors, so the latest ancestor is the last
2217 # node.
2217 # node.
2218 highestrev = len(self) - 1
2218 highestrev = len(self) - 1
2219 # Set ancestors to None to signal that every node is an ancestor.
2219 # Set ancestors to None to signal that every node is an ancestor.
2220 ancestors = None
2220 ancestors = None
2221 # Set heads to an empty dictionary for later discovery of heads
2221 # Set heads to an empty dictionary for later discovery of heads
2222 heads = {}
2222 heads = {}
2223 else:
2223 else:
2224 heads = list(heads)
2224 heads = list(heads)
2225 if not heads:
2225 if not heads:
2226 return nonodes
2226 return nonodes
2227 ancestors = set()
2227 ancestors = set()
2228 # Turn heads into a dictionary so we can remove 'fake' heads.
2228 # Turn heads into a dictionary so we can remove 'fake' heads.
2229 # Also, later we will be using it to filter out the heads we can't
2229 # Also, later we will be using it to filter out the heads we can't
2230 # find from roots.
2230 # find from roots.
2231 heads = dict.fromkeys(heads, False)
2231 heads = dict.fromkeys(heads, False)
2232 # Start at the top and keep marking parents until we're done.
2232 # Start at the top and keep marking parents until we're done.
2233 nodestotag = set(heads)
2233 nodestotag = set(heads)
2234 # Remember where the top was so we can use it as a limit later.
2234 # Remember where the top was so we can use it as a limit later.
2235 highestrev = max([self.rev(n) for n in nodestotag])
2235 highestrev = max([self.rev(n) for n in nodestotag])
2236 while nodestotag:
2236 while nodestotag:
2237 # grab a node to tag
2237 # grab a node to tag
2238 n = nodestotag.pop()
2238 n = nodestotag.pop()
2239 # Never tag nullid
2239 # Never tag nullid
2240 if n == self.nullid:
2240 if n == self.nullid:
2241 continue
2241 continue
2242 # A node's revision number represents its place in a
2242 # A node's revision number represents its place in a
2243 # topologically sorted list of nodes.
2243 # topologically sorted list of nodes.
2244 r = self.rev(n)
2244 r = self.rev(n)
2245 if r >= lowestrev:
2245 if r >= lowestrev:
2246 if n not in ancestors:
2246 if n not in ancestors:
2247 # If we are possibly a descendant of one of the roots
2247 # If we are possibly a descendant of one of the roots
2248 # and we haven't already been marked as an ancestor
2248 # and we haven't already been marked as an ancestor
2249 ancestors.add(n) # Mark as ancestor
2249 ancestors.add(n) # Mark as ancestor
2250 # Add non-nullid parents to list of nodes to tag.
2250 # Add non-nullid parents to list of nodes to tag.
2251 nodestotag.update(
2251 nodestotag.update(
2252 [p for p in self.parents(n) if p != self.nullid]
2252 [p for p in self.parents(n) if p != self.nullid]
2253 )
2253 )
2254 elif n in heads: # We've seen it before, is it a fake head?
2254 elif n in heads: # We've seen it before, is it a fake head?
2255 # So it is, real heads should not be the ancestors of
2255 # So it is, real heads should not be the ancestors of
2256 # any other heads.
2256 # any other heads.
2257 heads.pop(n)
2257 heads.pop(n)
2258 if not ancestors:
2258 if not ancestors:
2259 return nonodes
2259 return nonodes
2260 # Now that we have our set of ancestors, we want to remove any
2260 # Now that we have our set of ancestors, we want to remove any
2261 # roots that are not ancestors.
2261 # roots that are not ancestors.
2262
2262
2263 # If one of the roots was nullid, everything is included anyway.
2263 # If one of the roots was nullid, everything is included anyway.
2264 if lowestrev > nullrev:
2264 if lowestrev > nullrev:
2265 # But, since we weren't, let's recompute the lowest rev to not
2265 # But, since we weren't, let's recompute the lowest rev to not
2266 # include roots that aren't ancestors.
2266 # include roots that aren't ancestors.
2267
2267
2268 # Filter out roots that aren't ancestors of heads
2268 # Filter out roots that aren't ancestors of heads
2269 roots = [root for root in roots if root in ancestors]
2269 roots = [root for root in roots if root in ancestors]
2270 # Recompute the lowest revision
2270 # Recompute the lowest revision
2271 if roots:
2271 if roots:
2272 lowestrev = min([self.rev(root) for root in roots])
2272 lowestrev = min([self.rev(root) for root in roots])
2273 else:
2273 else:
2274 # No more roots? Return empty list
2274 # No more roots? Return empty list
2275 return nonodes
2275 return nonodes
2276 else:
2276 else:
2277 # We are descending from nullid, and don't need to care about
2277 # We are descending from nullid, and don't need to care about
2278 # any other roots.
2278 # any other roots.
2279 lowestrev = nullrev
2279 lowestrev = nullrev
2280 roots = [self.nullid]
2280 roots = [self.nullid]
2281 # Transform our roots list into a set.
2281 # Transform our roots list into a set.
2282 descendants = set(roots)
2282 descendants = set(roots)
2283 # Also, keep the original roots so we can filter out roots that aren't
2283 # Also, keep the original roots so we can filter out roots that aren't
2284 # 'real' roots (i.e. are descended from other roots).
2284 # 'real' roots (i.e. are descended from other roots).
2285 roots = descendants.copy()
2285 roots = descendants.copy()
2286 # Our topologically sorted list of output nodes.
2286 # Our topologically sorted list of output nodes.
2287 orderedout = []
2287 orderedout = []
2288 # Don't start at nullid since we don't want nullid in our output list,
2288 # Don't start at nullid since we don't want nullid in our output list,
2289 # and if nullid shows up in descendants, empty parents will look like
2289 # and if nullid shows up in descendants, empty parents will look like
2290 # they're descendants.
2290 # they're descendants.
2291 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2291 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2292 n = self.node(r)
2292 n = self.node(r)
2293 isdescendant = False
2293 isdescendant = False
2294 if lowestrev == nullrev: # Everybody is a descendant of nullid
2294 if lowestrev == nullrev: # Everybody is a descendant of nullid
2295 isdescendant = True
2295 isdescendant = True
2296 elif n in descendants:
2296 elif n in descendants:
2297 # n is already a descendant
2297 # n is already a descendant
2298 isdescendant = True
2298 isdescendant = True
2299 # This check only needs to be done here because all the roots
2299 # This check only needs to be done here because all the roots
2300 # will start being marked is descendants before the loop.
2300 # will start being marked is descendants before the loop.
2301 if n in roots:
2301 if n in roots:
2302 # If n was a root, check if it's a 'real' root.
2302 # If n was a root, check if it's a 'real' root.
2303 p = tuple(self.parents(n))
2303 p = tuple(self.parents(n))
2304 # If any of its parents are descendants, it's not a root.
2304 # If any of its parents are descendants, it's not a root.
2305 if (p[0] in descendants) or (p[1] in descendants):
2305 if (p[0] in descendants) or (p[1] in descendants):
2306 roots.remove(n)
2306 roots.remove(n)
2307 else:
2307 else:
2308 p = tuple(self.parents(n))
2308 p = tuple(self.parents(n))
2309 # A node is a descendant if either of its parents are
2309 # A node is a descendant if either of its parents are
2310 # descendants. (We seeded the dependents list with the roots
2310 # descendants. (We seeded the dependents list with the roots
2311 # up there, remember?)
2311 # up there, remember?)
2312 if (p[0] in descendants) or (p[1] in descendants):
2312 if (p[0] in descendants) or (p[1] in descendants):
2313 descendants.add(n)
2313 descendants.add(n)
2314 isdescendant = True
2314 isdescendant = True
2315 if isdescendant and ((ancestors is None) or (n in ancestors)):
2315 if isdescendant and ((ancestors is None) or (n in ancestors)):
2316 # Only include nodes that are both descendants and ancestors.
2316 # Only include nodes that are both descendants and ancestors.
2317 orderedout.append(n)
2317 orderedout.append(n)
2318 if (ancestors is not None) and (n in heads):
2318 if (ancestors is not None) and (n in heads):
2319 # We're trying to figure out which heads are reachable
2319 # We're trying to figure out which heads are reachable
2320 # from roots.
2320 # from roots.
2321 # Mark this head as having been reached
2321 # Mark this head as having been reached
2322 heads[n] = True
2322 heads[n] = True
2323 elif ancestors is None:
2323 elif ancestors is None:
2324 # Otherwise, we're trying to discover the heads.
2324 # Otherwise, we're trying to discover the heads.
2325 # Assume this is a head because if it isn't, the next step
2325 # Assume this is a head because if it isn't, the next step
2326 # will eventually remove it.
2326 # will eventually remove it.
2327 heads[n] = True
2327 heads[n] = True
2328 # But, obviously its parents aren't.
2328 # But, obviously its parents aren't.
2329 for p in self.parents(n):
2329 for p in self.parents(n):
2330 heads.pop(p, None)
2330 heads.pop(p, None)
2331 heads = [head for head, flag in heads.items() if flag]
2331 heads = [head for head, flag in heads.items() if flag]
2332 roots = list(roots)
2332 roots = list(roots)
2333 assert orderedout
2333 assert orderedout
2334 assert roots
2334 assert roots
2335 assert heads
2335 assert heads
2336 return (orderedout, roots, heads)
2336 return (orderedout, roots, heads)
2337
2337
2338 def headrevs(self, revs=None):
2338 def headrevs(self, revs=None):
2339 if revs is None:
2339 if revs is None:
2340 try:
2340 try:
2341 return self.index.headrevs()
2341 return self.index.headrevs()
2342 except AttributeError:
2342 except AttributeError:
2343 return self._headrevs()
2343 return self._headrevs()
2344 if rustdagop is not None and self.index.rust_ext_compat:
2344 if rustdagop is not None and self.index.rust_ext_compat:
2345 return rustdagop.headrevs(self.index, revs)
2345 return rustdagop.headrevs(self.index, revs)
2346 return dagop.headrevs(revs, self._uncheckedparentrevs)
2346 return dagop.headrevs(revs, self._uncheckedparentrevs)
2347
2347
2348 def headrevsdiff(self, start, stop):
2349 try:
2350 return self.index.headrevsdiff(start, stop)
2351 except AttributeError:
2352 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2353
2348 def computephases(self, roots):
2354 def computephases(self, roots):
2349 return self.index.computephasesmapsets(roots)
2355 return self.index.computephasesmapsets(roots)
2350
2356
2351 def _headrevs(self):
2357 def _headrevs(self):
2352 count = len(self)
2358 count = len(self)
2353 if not count:
2359 if not count:
2354 return [nullrev]
2360 return [nullrev]
2355 # we won't iter over filtered rev so nobody is a head at start
2361 # we won't iter over filtered rev so nobody is a head at start
2356 ishead = [0] * (count + 1)
2362 ishead = [0] * (count + 1)
2357 index = self.index
2363 index = self.index
2358 for r in self:
2364 for r in self:
2359 ishead[r] = 1 # I may be an head
2365 ishead[r] = 1 # I may be an head
2360 e = index[r]
2366 e = index[r]
2361 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2367 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2362 return [r for r, val in enumerate(ishead) if val]
2368 return [r for r, val in enumerate(ishead) if val]
2363
2369
2364 def _head_node_ids(self):
2370 def _head_node_ids(self):
2365 try:
2371 try:
2366 return self.index.head_node_ids()
2372 return self.index.head_node_ids()
2367 except AttributeError:
2373 except AttributeError:
2368 return [self.node(r) for r in self.headrevs()]
2374 return [self.node(r) for r in self.headrevs()]
2369
2375
2370 def heads(self, start=None, stop=None):
2376 def heads(self, start=None, stop=None):
2371 """return the list of all nodes that have no children
2377 """return the list of all nodes that have no children
2372
2378
2373 if start is specified, only heads that are descendants of
2379 if start is specified, only heads that are descendants of
2374 start will be returned
2380 start will be returned
2375 if stop is specified, it will consider all the revs from stop
2381 if stop is specified, it will consider all the revs from stop
2376 as if they had no children
2382 as if they had no children
2377 """
2383 """
2378 if start is None and stop is None:
2384 if start is None and stop is None:
2379 if not len(self):
2385 if not len(self):
2380 return [self.nullid]
2386 return [self.nullid]
2381 return self._head_node_ids()
2387 return self._head_node_ids()
2382 if start is None:
2388 if start is None:
2383 start = nullrev
2389 start = nullrev
2384 else:
2390 else:
2385 start = self.rev(start)
2391 start = self.rev(start)
2386
2392
2387 stoprevs = {self.rev(n) for n in stop or []}
2393 stoprevs = {self.rev(n) for n in stop or []}
2388
2394
2389 revs = dagop.headrevssubset(
2395 revs = dagop.headrevssubset(
2390 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2396 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2391 )
2397 )
2392
2398
2393 return [self.node(rev) for rev in revs]
2399 return [self.node(rev) for rev in revs]
2394
2400
2401 def diffheads(self, start, stop):
2402 """return the nodes that make up the difference between
2403 heads of revs before `start` and heads of revs before `stop`"""
2404 removed, added = self.headrevsdiff(start, stop)
2405 return [self.node(r) for r in removed], [self.node(r) for r in added]
2406
2395 def children(self, node):
2407 def children(self, node):
2396 """find the children of a given node"""
2408 """find the children of a given node"""
2397 c = []
2409 c = []
2398 p = self.rev(node)
2410 p = self.rev(node)
2399 for r in self.revs(start=p + 1):
2411 for r in self.revs(start=p + 1):
2400 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2412 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2401 if prevs:
2413 if prevs:
2402 for pr in prevs:
2414 for pr in prevs:
2403 if pr == p:
2415 if pr == p:
2404 c.append(self.node(r))
2416 c.append(self.node(r))
2405 elif p == nullrev:
2417 elif p == nullrev:
2406 c.append(self.node(r))
2418 c.append(self.node(r))
2407 return c
2419 return c
2408
2420
2409 def commonancestorsheads(self, a, b):
2421 def commonancestorsheads(self, a, b):
2410 """calculate all the heads of the common ancestors of nodes a and b"""
2422 """calculate all the heads of the common ancestors of nodes a and b"""
2411 a, b = self.rev(a), self.rev(b)
2423 a, b = self.rev(a), self.rev(b)
2412 ancs = self._commonancestorsheads(a, b)
2424 ancs = self._commonancestorsheads(a, b)
2413 return pycompat.maplist(self.node, ancs)
2425 return pycompat.maplist(self.node, ancs)
2414
2426
2415 def _commonancestorsheads(self, *revs):
2427 def _commonancestorsheads(self, *revs):
2416 """calculate all the heads of the common ancestors of revs"""
2428 """calculate all the heads of the common ancestors of revs"""
2417 try:
2429 try:
2418 ancs = self.index.commonancestorsheads(*revs)
2430 ancs = self.index.commonancestorsheads(*revs)
2419 except (AttributeError, OverflowError): # C implementation failed
2431 except (AttributeError, OverflowError): # C implementation failed
2420 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2432 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2421 return ancs
2433 return ancs
2422
2434
2423 def isancestor(self, a, b):
2435 def isancestor(self, a, b):
2424 """return True if node a is an ancestor of node b
2436 """return True if node a is an ancestor of node b
2425
2437
2426 A revision is considered an ancestor of itself."""
2438 A revision is considered an ancestor of itself."""
2427 a, b = self.rev(a), self.rev(b)
2439 a, b = self.rev(a), self.rev(b)
2428 return self.isancestorrev(a, b)
2440 return self.isancestorrev(a, b)
2429
2441
2430 def isancestorrev(self, a, b):
2442 def isancestorrev(self, a, b):
2431 """return True if revision a is an ancestor of revision b
2443 """return True if revision a is an ancestor of revision b
2432
2444
2433 A revision is considered an ancestor of itself.
2445 A revision is considered an ancestor of itself.
2434
2446
2435 The implementation of this is trivial but the use of
2447 The implementation of this is trivial but the use of
2436 reachableroots is not."""
2448 reachableroots is not."""
2437 if a == nullrev:
2449 if a == nullrev:
2438 return True
2450 return True
2439 elif a == b:
2451 elif a == b:
2440 return True
2452 return True
2441 elif a > b:
2453 elif a > b:
2442 return False
2454 return False
2443 return bool(self.reachableroots(a, [b], [a], includepath=False))
2455 return bool(self.reachableroots(a, [b], [a], includepath=False))
2444
2456
2445 def reachableroots(self, minroot, heads, roots, includepath=False):
2457 def reachableroots(self, minroot, heads, roots, includepath=False):
2446 """return (heads(::(<roots> and <roots>::<heads>)))
2458 """return (heads(::(<roots> and <roots>::<heads>)))
2447
2459
2448 If includepath is True, return (<roots>::<heads>)."""
2460 If includepath is True, return (<roots>::<heads>)."""
2449 try:
2461 try:
2450 return self.index.reachableroots2(
2462 return self.index.reachableroots2(
2451 minroot, heads, roots, includepath
2463 minroot, heads, roots, includepath
2452 )
2464 )
2453 except AttributeError:
2465 except AttributeError:
2454 return dagop._reachablerootspure(
2466 return dagop._reachablerootspure(
2455 self.parentrevs, minroot, roots, heads, includepath
2467 self.parentrevs, minroot, roots, heads, includepath
2456 )
2468 )
2457
2469
2458 def ancestor(self, a, b):
2470 def ancestor(self, a, b):
2459 """calculate the "best" common ancestor of nodes a and b"""
2471 """calculate the "best" common ancestor of nodes a and b"""
2460
2472
2461 a, b = self.rev(a), self.rev(b)
2473 a, b = self.rev(a), self.rev(b)
2462 try:
2474 try:
2463 ancs = self.index.ancestors(a, b)
2475 ancs = self.index.ancestors(a, b)
2464 except (AttributeError, OverflowError):
2476 except (AttributeError, OverflowError):
2465 ancs = ancestor.ancestors(self.parentrevs, a, b)
2477 ancs = ancestor.ancestors(self.parentrevs, a, b)
2466 if ancs:
2478 if ancs:
2467 # choose a consistent winner when there's a tie
2479 # choose a consistent winner when there's a tie
2468 return min(map(self.node, ancs))
2480 return min(map(self.node, ancs))
2469 return self.nullid
2481 return self.nullid
2470
2482
2471 def _match(self, id):
2483 def _match(self, id):
2472 if isinstance(id, int):
2484 if isinstance(id, int):
2473 # rev
2485 # rev
2474 return self.node(id)
2486 return self.node(id)
2475 if len(id) == self.nodeconstants.nodelen:
2487 if len(id) == self.nodeconstants.nodelen:
2476 # possibly a binary node
2488 # possibly a binary node
2477 # odds of a binary node being all hex in ASCII are 1 in 10**25
2489 # odds of a binary node being all hex in ASCII are 1 in 10**25
2478 try:
2490 try:
2479 node = id
2491 node = id
2480 self.rev(node) # quick search the index
2492 self.rev(node) # quick search the index
2481 return node
2493 return node
2482 except error.LookupError:
2494 except error.LookupError:
2483 pass # may be partial hex id
2495 pass # may be partial hex id
2484 try:
2496 try:
2485 # str(rev)
2497 # str(rev)
2486 rev = int(id)
2498 rev = int(id)
2487 if b"%d" % rev != id:
2499 if b"%d" % rev != id:
2488 raise ValueError
2500 raise ValueError
2489 if rev < 0:
2501 if rev < 0:
2490 rev = len(self) + rev
2502 rev = len(self) + rev
2491 if rev < 0 or rev >= len(self):
2503 if rev < 0 or rev >= len(self):
2492 raise ValueError
2504 raise ValueError
2493 return self.node(rev)
2505 return self.node(rev)
2494 except (ValueError, OverflowError):
2506 except (ValueError, OverflowError):
2495 pass
2507 pass
2496 if len(id) == 2 * self.nodeconstants.nodelen:
2508 if len(id) == 2 * self.nodeconstants.nodelen:
2497 try:
2509 try:
2498 # a full hex nodeid?
2510 # a full hex nodeid?
2499 node = bin(id)
2511 node = bin(id)
2500 self.rev(node)
2512 self.rev(node)
2501 return node
2513 return node
2502 except (binascii.Error, error.LookupError):
2514 except (binascii.Error, error.LookupError):
2503 pass
2515 pass
2504
2516
2505 def _partialmatch(self, id):
2517 def _partialmatch(self, id):
2506 # we don't care wdirfilenodeids as they should be always full hash
2518 # we don't care wdirfilenodeids as they should be always full hash
2507 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2519 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2508 ambiguous = False
2520 ambiguous = False
2509 try:
2521 try:
2510 partial = self.index.partialmatch(id)
2522 partial = self.index.partialmatch(id)
2511 if partial and self.hasnode(partial):
2523 if partial and self.hasnode(partial):
2512 if maybewdir:
2524 if maybewdir:
2513 # single 'ff...' match in radix tree, ambiguous with wdir
2525 # single 'ff...' match in radix tree, ambiguous with wdir
2514 ambiguous = True
2526 ambiguous = True
2515 else:
2527 else:
2516 return partial
2528 return partial
2517 elif maybewdir:
2529 elif maybewdir:
2518 # no 'ff...' match in radix tree, wdir identified
2530 # no 'ff...' match in radix tree, wdir identified
2519 raise error.WdirUnsupported
2531 raise error.WdirUnsupported
2520 else:
2532 else:
2521 return None
2533 return None
2522 except error.RevlogError:
2534 except error.RevlogError:
2523 # parsers.c radix tree lookup gave multiple matches
2535 # parsers.c radix tree lookup gave multiple matches
2524 # fast path: for unfiltered changelog, radix tree is accurate
2536 # fast path: for unfiltered changelog, radix tree is accurate
2525 if not getattr(self, 'filteredrevs', None):
2537 if not getattr(self, 'filteredrevs', None):
2526 ambiguous = True
2538 ambiguous = True
2527 # fall through to slow path that filters hidden revisions
2539 # fall through to slow path that filters hidden revisions
2528 except (AttributeError, ValueError):
2540 except (AttributeError, ValueError):
2529 # we are pure python, or key is not hex
2541 # we are pure python, or key is not hex
2530 pass
2542 pass
2531 if ambiguous:
2543 if ambiguous:
2532 raise error.AmbiguousPrefixLookupError(
2544 raise error.AmbiguousPrefixLookupError(
2533 id, self.display_id, _(b'ambiguous identifier')
2545 id, self.display_id, _(b'ambiguous identifier')
2534 )
2546 )
2535
2547
2536 if id in self._pcache:
2548 if id in self._pcache:
2537 return self._pcache[id]
2549 return self._pcache[id]
2538
2550
2539 if len(id) <= 40:
2551 if len(id) <= 40:
2540 # hex(node)[:...]
2552 # hex(node)[:...]
2541 l = len(id) // 2 * 2 # grab an even number of digits
2553 l = len(id) // 2 * 2 # grab an even number of digits
2542 try:
2554 try:
2543 # we're dropping the last digit, so let's check that it's hex,
2555 # we're dropping the last digit, so let's check that it's hex,
2544 # to avoid the expensive computation below if it's not
2556 # to avoid the expensive computation below if it's not
2545 if len(id) % 2 > 0:
2557 if len(id) % 2 > 0:
2546 if not (id[-1] in hexdigits):
2558 if not (id[-1] in hexdigits):
2547 return None
2559 return None
2548 prefix = bin(id[:l])
2560 prefix = bin(id[:l])
2549 except binascii.Error:
2561 except binascii.Error:
2550 pass
2562 pass
2551 else:
2563 else:
2552 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2564 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2553 nl = [
2565 nl = [
2554 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2566 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2555 ]
2567 ]
2556 if self.nodeconstants.nullhex.startswith(id):
2568 if self.nodeconstants.nullhex.startswith(id):
2557 nl.append(self.nullid)
2569 nl.append(self.nullid)
2558 if len(nl) > 0:
2570 if len(nl) > 0:
2559 if len(nl) == 1 and not maybewdir:
2571 if len(nl) == 1 and not maybewdir:
2560 self._pcache[id] = nl[0]
2572 self._pcache[id] = nl[0]
2561 return nl[0]
2573 return nl[0]
2562 raise error.AmbiguousPrefixLookupError(
2574 raise error.AmbiguousPrefixLookupError(
2563 id, self.display_id, _(b'ambiguous identifier')
2575 id, self.display_id, _(b'ambiguous identifier')
2564 )
2576 )
2565 if maybewdir:
2577 if maybewdir:
2566 raise error.WdirUnsupported
2578 raise error.WdirUnsupported
2567 return None
2579 return None
2568
2580
2569 def lookup(self, id):
2581 def lookup(self, id):
2570 """locate a node based on:
2582 """locate a node based on:
2571 - revision number or str(revision number)
2583 - revision number or str(revision number)
2572 - nodeid or subset of hex nodeid
2584 - nodeid or subset of hex nodeid
2573 """
2585 """
2574 n = self._match(id)
2586 n = self._match(id)
2575 if n is not None:
2587 if n is not None:
2576 return n
2588 return n
2577 n = self._partialmatch(id)
2589 n = self._partialmatch(id)
2578 if n:
2590 if n:
2579 return n
2591 return n
2580
2592
2581 raise error.LookupError(id, self.display_id, _(b'no match found'))
2593 raise error.LookupError(id, self.display_id, _(b'no match found'))
2582
2594
2583 def shortest(self, node, minlength=1):
2595 def shortest(self, node, minlength=1):
2584 """Find the shortest unambiguous prefix that matches node."""
2596 """Find the shortest unambiguous prefix that matches node."""
2585
2597
2586 def isvalid(prefix):
2598 def isvalid(prefix):
2587 try:
2599 try:
2588 matchednode = self._partialmatch(prefix)
2600 matchednode = self._partialmatch(prefix)
2589 except error.AmbiguousPrefixLookupError:
2601 except error.AmbiguousPrefixLookupError:
2590 return False
2602 return False
2591 except error.WdirUnsupported:
2603 except error.WdirUnsupported:
2592 # single 'ff...' match
2604 # single 'ff...' match
2593 return True
2605 return True
2594 if matchednode is None:
2606 if matchednode is None:
2595 raise error.LookupError(node, self.display_id, _(b'no node'))
2607 raise error.LookupError(node, self.display_id, _(b'no node'))
2596 return True
2608 return True
2597
2609
2598 def maybewdir(prefix):
2610 def maybewdir(prefix):
2599 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2611 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2600
2612
2601 hexnode = hex(node)
2613 hexnode = hex(node)
2602
2614
2603 def disambiguate(hexnode, minlength):
2615 def disambiguate(hexnode, minlength):
2604 """Disambiguate against wdirid."""
2616 """Disambiguate against wdirid."""
2605 for length in range(minlength, len(hexnode) + 1):
2617 for length in range(minlength, len(hexnode) + 1):
2606 prefix = hexnode[:length]
2618 prefix = hexnode[:length]
2607 if not maybewdir(prefix):
2619 if not maybewdir(prefix):
2608 return prefix
2620 return prefix
2609
2621
2610 if not getattr(self, 'filteredrevs', None):
2622 if not getattr(self, 'filteredrevs', None):
2611 try:
2623 try:
2612 length = max(self.index.shortest(node), minlength)
2624 length = max(self.index.shortest(node), minlength)
2613 return disambiguate(hexnode, length)
2625 return disambiguate(hexnode, length)
2614 except error.RevlogError:
2626 except error.RevlogError:
2615 if node != self.nodeconstants.wdirid:
2627 if node != self.nodeconstants.wdirid:
2616 raise error.LookupError(
2628 raise error.LookupError(
2617 node, self.display_id, _(b'no node')
2629 node, self.display_id, _(b'no node')
2618 )
2630 )
2619 except AttributeError:
2631 except AttributeError:
2620 # Fall through to pure code
2632 # Fall through to pure code
2621 pass
2633 pass
2622
2634
2623 if node == self.nodeconstants.wdirid:
2635 if node == self.nodeconstants.wdirid:
2624 for length in range(minlength, len(hexnode) + 1):
2636 for length in range(minlength, len(hexnode) + 1):
2625 prefix = hexnode[:length]
2637 prefix = hexnode[:length]
2626 if isvalid(prefix):
2638 if isvalid(prefix):
2627 return prefix
2639 return prefix
2628
2640
2629 for length in range(minlength, len(hexnode) + 1):
2641 for length in range(minlength, len(hexnode) + 1):
2630 prefix = hexnode[:length]
2642 prefix = hexnode[:length]
2631 if isvalid(prefix):
2643 if isvalid(prefix):
2632 return disambiguate(hexnode, length)
2644 return disambiguate(hexnode, length)
2633
2645
2634 def cmp(self, node, text):
2646 def cmp(self, node, text):
2635 """compare text with a given file revision
2647 """compare text with a given file revision
2636
2648
2637 returns True if text is different than what is stored.
2649 returns True if text is different than what is stored.
2638 """
2650 """
2639 p1, p2 = self.parents(node)
2651 p1, p2 = self.parents(node)
2640 return storageutil.hashrevisionsha1(text, p1, p2) != node
2652 return storageutil.hashrevisionsha1(text, p1, p2) != node
2641
2653
2642 def deltaparent(self, rev):
2654 def deltaparent(self, rev):
2643 """return deltaparent of the given revision"""
2655 """return deltaparent of the given revision"""
2644 base = self.index[rev][3]
2656 base = self.index[rev][3]
2645 if base == rev:
2657 if base == rev:
2646 return nullrev
2658 return nullrev
2647 elif self.delta_config.general_delta:
2659 elif self.delta_config.general_delta:
2648 return base
2660 return base
2649 else:
2661 else:
2650 return rev - 1
2662 return rev - 1
2651
2663
2652 def issnapshot(self, rev):
2664 def issnapshot(self, rev):
2653 """tells whether rev is a snapshot"""
2665 """tells whether rev is a snapshot"""
2654 ret = self._inner.issnapshot(rev)
2666 ret = self._inner.issnapshot(rev)
2655 self.issnapshot = self._inner.issnapshot
2667 self.issnapshot = self._inner.issnapshot
2656 return ret
2668 return ret
2657
2669
2658 def snapshotdepth(self, rev):
2670 def snapshotdepth(self, rev):
2659 """number of snapshot in the chain before this one"""
2671 """number of snapshot in the chain before this one"""
2660 if not self.issnapshot(rev):
2672 if not self.issnapshot(rev):
2661 raise error.ProgrammingError(b'revision %d not a snapshot')
2673 raise error.ProgrammingError(b'revision %d not a snapshot')
2662 return len(self._inner._deltachain(rev)[0]) - 1
2674 return len(self._inner._deltachain(rev)[0]) - 1
2663
2675
2664 def revdiff(self, rev1, rev2):
2676 def revdiff(self, rev1, rev2):
2665 """return or calculate a delta between two revisions
2677 """return or calculate a delta between two revisions
2666
2678
2667 The delta calculated is in binary form and is intended to be written to
2679 The delta calculated is in binary form and is intended to be written to
2668 revlog data directly. So this function needs raw revision data.
2680 revlog data directly. So this function needs raw revision data.
2669 """
2681 """
2670 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2682 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2671 return bytes(self._inner._chunk(rev2))
2683 return bytes(self._inner._chunk(rev2))
2672
2684
2673 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2685 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2674
2686
2675 def revision(self, nodeorrev):
2687 def revision(self, nodeorrev):
2676 """return an uncompressed revision of a given node or revision
2688 """return an uncompressed revision of a given node or revision
2677 number.
2689 number.
2678 """
2690 """
2679 return self._revisiondata(nodeorrev)
2691 return self._revisiondata(nodeorrev)
2680
2692
2681 def sidedata(self, nodeorrev):
2693 def sidedata(self, nodeorrev):
2682 """a map of extra data related to the changeset but not part of the hash
2694 """a map of extra data related to the changeset but not part of the hash
2683
2695
2684 This function currently return a dictionary. However, more advanced
2696 This function currently return a dictionary. However, more advanced
2685 mapping object will likely be used in the future for a more
2697 mapping object will likely be used in the future for a more
2686 efficient/lazy code.
2698 efficient/lazy code.
2687 """
2699 """
2688 # deal with <nodeorrev> argument type
2700 # deal with <nodeorrev> argument type
2689 if isinstance(nodeorrev, int):
2701 if isinstance(nodeorrev, int):
2690 rev = nodeorrev
2702 rev = nodeorrev
2691 else:
2703 else:
2692 rev = self.rev(nodeorrev)
2704 rev = self.rev(nodeorrev)
2693 return self._sidedata(rev)
2705 return self._sidedata(rev)
2694
2706
2695 def _rawtext(self, node, rev):
2707 def _rawtext(self, node, rev):
2696 """return the possibly unvalidated rawtext for a revision
2708 """return the possibly unvalidated rawtext for a revision
2697
2709
2698 returns (rev, rawtext, validated)
2710 returns (rev, rawtext, validated)
2699 """
2711 """
2700 # Check if we have the entry in cache
2712 # Check if we have the entry in cache
2701 # The cache entry looks like (node, rev, rawtext)
2713 # The cache entry looks like (node, rev, rawtext)
2702 if self._inner._revisioncache:
2714 if self._inner._revisioncache:
2703 if self._inner._revisioncache[0] == node:
2715 if self._inner._revisioncache[0] == node:
2704 return (rev, self._inner._revisioncache[2], True)
2716 return (rev, self._inner._revisioncache[2], True)
2705
2717
2706 if rev is None:
2718 if rev is None:
2707 rev = self.rev(node)
2719 rev = self.rev(node)
2708
2720
2709 return self._inner.raw_text(node, rev)
2721 return self._inner.raw_text(node, rev)
2710
2722
2711 def _revisiondata(self, nodeorrev, raw=False):
2723 def _revisiondata(self, nodeorrev, raw=False):
2712 # deal with <nodeorrev> argument type
2724 # deal with <nodeorrev> argument type
2713 if isinstance(nodeorrev, int):
2725 if isinstance(nodeorrev, int):
2714 rev = nodeorrev
2726 rev = nodeorrev
2715 node = self.node(rev)
2727 node = self.node(rev)
2716 else:
2728 else:
2717 node = nodeorrev
2729 node = nodeorrev
2718 rev = None
2730 rev = None
2719
2731
2720 # fast path the special `nullid` rev
2732 # fast path the special `nullid` rev
2721 if node == self.nullid:
2733 if node == self.nullid:
2722 return b""
2734 return b""
2723
2735
2724 # ``rawtext`` is the text as stored inside the revlog. Might be the
2736 # ``rawtext`` is the text as stored inside the revlog. Might be the
2725 # revision or might need to be processed to retrieve the revision.
2737 # revision or might need to be processed to retrieve the revision.
2726 rev, rawtext, validated = self._rawtext(node, rev)
2738 rev, rawtext, validated = self._rawtext(node, rev)
2727
2739
2728 if raw and validated:
2740 if raw and validated:
2729 # if we don't want to process the raw text and that raw
2741 # if we don't want to process the raw text and that raw
2730 # text is cached, we can exit early.
2742 # text is cached, we can exit early.
2731 return rawtext
2743 return rawtext
2732 if rev is None:
2744 if rev is None:
2733 rev = self.rev(node)
2745 rev = self.rev(node)
2734 # the revlog's flag for this revision
2746 # the revlog's flag for this revision
2735 # (usually alter its state or content)
2747 # (usually alter its state or content)
2736 flags = self.flags(rev)
2748 flags = self.flags(rev)
2737
2749
2738 if validated and flags == REVIDX_DEFAULT_FLAGS:
2750 if validated and flags == REVIDX_DEFAULT_FLAGS:
2739 # no extra flags set, no flag processor runs, text = rawtext
2751 # no extra flags set, no flag processor runs, text = rawtext
2740 return rawtext
2752 return rawtext
2741
2753
2742 if raw:
2754 if raw:
2743 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2755 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2744 text = rawtext
2756 text = rawtext
2745 else:
2757 else:
2746 r = flagutil.processflagsread(self, rawtext, flags)
2758 r = flagutil.processflagsread(self, rawtext, flags)
2747 text, validatehash = r
2759 text, validatehash = r
2748 if validatehash:
2760 if validatehash:
2749 self.checkhash(text, node, rev=rev)
2761 self.checkhash(text, node, rev=rev)
2750 if not validated:
2762 if not validated:
2751 self._inner._revisioncache = (node, rev, rawtext)
2763 self._inner._revisioncache = (node, rev, rawtext)
2752
2764
2753 return text
2765 return text
2754
2766
2755 def _sidedata(self, rev):
2767 def _sidedata(self, rev):
2756 """Return the sidedata for a given revision number."""
2768 """Return the sidedata for a given revision number."""
2757 sidedata_end = None
2769 sidedata_end = None
2758 if self._docket is not None:
2770 if self._docket is not None:
2759 sidedata_end = self._docket.sidedata_end
2771 sidedata_end = self._docket.sidedata_end
2760 return self._inner.sidedata(rev, sidedata_end)
2772 return self._inner.sidedata(rev, sidedata_end)
2761
2773
2762 def rawdata(self, nodeorrev):
2774 def rawdata(self, nodeorrev):
2763 """return an uncompressed raw data of a given node or revision number."""
2775 """return an uncompressed raw data of a given node or revision number."""
2764 return self._revisiondata(nodeorrev, raw=True)
2776 return self._revisiondata(nodeorrev, raw=True)
2765
2777
2766 def hash(self, text, p1, p2):
2778 def hash(self, text, p1, p2):
2767 """Compute a node hash.
2779 """Compute a node hash.
2768
2780
2769 Available as a function so that subclasses can replace the hash
2781 Available as a function so that subclasses can replace the hash
2770 as needed.
2782 as needed.
2771 """
2783 """
2772 return storageutil.hashrevisionsha1(text, p1, p2)
2784 return storageutil.hashrevisionsha1(text, p1, p2)
2773
2785
2774 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2786 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2775 """Check node hash integrity.
2787 """Check node hash integrity.
2776
2788
2777 Available as a function so that subclasses can extend hash mismatch
2789 Available as a function so that subclasses can extend hash mismatch
2778 behaviors as needed.
2790 behaviors as needed.
2779 """
2791 """
2780 try:
2792 try:
2781 if p1 is None and p2 is None:
2793 if p1 is None and p2 is None:
2782 p1, p2 = self.parents(node)
2794 p1, p2 = self.parents(node)
2783 if node != self.hash(text, p1, p2):
2795 if node != self.hash(text, p1, p2):
2784 # Clear the revision cache on hash failure. The revision cache
2796 # Clear the revision cache on hash failure. The revision cache
2785 # only stores the raw revision and clearing the cache does have
2797 # only stores the raw revision and clearing the cache does have
2786 # the side-effect that we won't have a cache hit when the raw
2798 # the side-effect that we won't have a cache hit when the raw
2787 # revision data is accessed. But this case should be rare and
2799 # revision data is accessed. But this case should be rare and
2788 # it is extra work to teach the cache about the hash
2800 # it is extra work to teach the cache about the hash
2789 # verification state.
2801 # verification state.
2790 if (
2802 if (
2791 self._inner._revisioncache
2803 self._inner._revisioncache
2792 and self._inner._revisioncache[0] == node
2804 and self._inner._revisioncache[0] == node
2793 ):
2805 ):
2794 self._inner._revisioncache = None
2806 self._inner._revisioncache = None
2795
2807
2796 revornode = rev
2808 revornode = rev
2797 if revornode is None:
2809 if revornode is None:
2798 revornode = templatefilters.short(hex(node))
2810 revornode = templatefilters.short(hex(node))
2799 raise error.RevlogError(
2811 raise error.RevlogError(
2800 _(b"integrity check failed on %s:%s")
2812 _(b"integrity check failed on %s:%s")
2801 % (self.display_id, pycompat.bytestr(revornode))
2813 % (self.display_id, pycompat.bytestr(revornode))
2802 )
2814 )
2803 except error.RevlogError:
2815 except error.RevlogError:
2804 if self.feature_config.censorable and storageutil.iscensoredtext(
2816 if self.feature_config.censorable and storageutil.iscensoredtext(
2805 text
2817 text
2806 ):
2818 ):
2807 raise error.CensoredNodeError(self.display_id, node, text)
2819 raise error.CensoredNodeError(self.display_id, node, text)
2808 raise
2820 raise
2809
2821
2810 @property
2822 @property
2811 def _split_index_file(self):
2823 def _split_index_file(self):
2812 """the path where to expect the index of an ongoing splitting operation
2824 """the path where to expect the index of an ongoing splitting operation
2813
2825
2814 The file will only exist if a splitting operation is in progress, but
2826 The file will only exist if a splitting operation is in progress, but
2815 it is always expected at the same location."""
2827 it is always expected at the same location."""
2816 parts = self.radix.split(b'/')
2828 parts = self.radix.split(b'/')
2817 if len(parts) > 1:
2829 if len(parts) > 1:
2818 # adds a '-s' prefix to the ``data/` or `meta/` base
2830 # adds a '-s' prefix to the ``data/` or `meta/` base
2819 head = parts[0] + b'-s'
2831 head = parts[0] + b'-s'
2820 mids = parts[1:-1]
2832 mids = parts[1:-1]
2821 tail = parts[-1] + b'.i'
2833 tail = parts[-1] + b'.i'
2822 pieces = [head] + mids + [tail]
2834 pieces = [head] + mids + [tail]
2823 return b'/'.join(pieces)
2835 return b'/'.join(pieces)
2824 else:
2836 else:
2825 # the revlog is stored at the root of the store (changelog or
2837 # the revlog is stored at the root of the store (changelog or
2826 # manifest), no risk of collision.
2838 # manifest), no risk of collision.
2827 return self.radix + b'.i.s'
2839 return self.radix + b'.i.s'
2828
2840
2829 def _enforceinlinesize(self, tr):
2841 def _enforceinlinesize(self, tr):
2830 """Check if the revlog is too big for inline and convert if so.
2842 """Check if the revlog is too big for inline and convert if so.
2831
2843
2832 This should be called after revisions are added to the revlog. If the
2844 This should be called after revisions are added to the revlog. If the
2833 revlog has grown too large to be an inline revlog, it will convert it
2845 revlog has grown too large to be an inline revlog, it will convert it
2834 to use multiple index and data files.
2846 to use multiple index and data files.
2835 """
2847 """
2836 tiprev = len(self) - 1
2848 tiprev = len(self) - 1
2837 total_size = self.start(tiprev) + self.length(tiprev)
2849 total_size = self.start(tiprev) + self.length(tiprev)
2838 if not self._inline or (self._may_inline and total_size < _maxinline):
2850 if not self._inline or (self._may_inline and total_size < _maxinline):
2839 return
2851 return
2840
2852
2841 if self._docket is not None:
2853 if self._docket is not None:
2842 msg = b"inline revlog should not have a docket"
2854 msg = b"inline revlog should not have a docket"
2843 raise error.ProgrammingError(msg)
2855 raise error.ProgrammingError(msg)
2844
2856
2845 # In the common case, we enforce inline size because the revlog has
2857 # In the common case, we enforce inline size because the revlog has
2846 # been appened too. And in such case, it must have an initial offset
2858 # been appened too. And in such case, it must have an initial offset
2847 # recorded in the transaction.
2859 # recorded in the transaction.
2848 troffset = tr.findoffset(self._inner.canonical_index_file)
2860 troffset = tr.findoffset(self._inner.canonical_index_file)
2849 pre_touched = troffset is not None
2861 pre_touched = troffset is not None
2850 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2862 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2851 raise error.RevlogError(
2863 raise error.RevlogError(
2852 _(b"%s not found in the transaction") % self._indexfile
2864 _(b"%s not found in the transaction") % self._indexfile
2853 )
2865 )
2854
2866
2855 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2867 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2856 tr.add(self._datafile, 0)
2868 tr.add(self._datafile, 0)
2857
2869
2858 new_index_file_path = None
2870 new_index_file_path = None
2859 old_index_file_path = self._indexfile
2871 old_index_file_path = self._indexfile
2860 new_index_file_path = self._split_index_file
2872 new_index_file_path = self._split_index_file
2861 opener = self.opener
2873 opener = self.opener
2862 weak_self = weakref.ref(self)
2874 weak_self = weakref.ref(self)
2863
2875
2864 # the "split" index replace the real index when the transaction is
2876 # the "split" index replace the real index when the transaction is
2865 # finalized
2877 # finalized
2866 def finalize_callback(tr):
2878 def finalize_callback(tr):
2867 opener.rename(
2879 opener.rename(
2868 new_index_file_path,
2880 new_index_file_path,
2869 old_index_file_path,
2881 old_index_file_path,
2870 checkambig=True,
2882 checkambig=True,
2871 )
2883 )
2872 maybe_self = weak_self()
2884 maybe_self = weak_self()
2873 if maybe_self is not None:
2885 if maybe_self is not None:
2874 maybe_self._indexfile = old_index_file_path
2886 maybe_self._indexfile = old_index_file_path
2875 maybe_self._inner.index_file = maybe_self._indexfile
2887 maybe_self._inner.index_file = maybe_self._indexfile
2876
2888
2877 def abort_callback(tr):
2889 def abort_callback(tr):
2878 maybe_self = weak_self()
2890 maybe_self = weak_self()
2879 if maybe_self is not None:
2891 if maybe_self is not None:
2880 maybe_self._indexfile = old_index_file_path
2892 maybe_self._indexfile = old_index_file_path
2881 maybe_self._inner.inline = True
2893 maybe_self._inner.inline = True
2882 maybe_self._inner.index_file = old_index_file_path
2894 maybe_self._inner.index_file = old_index_file_path
2883
2895
2884 tr.registertmp(new_index_file_path)
2896 tr.registertmp(new_index_file_path)
2885 if self.target[1] is not None:
2897 if self.target[1] is not None:
2886 callback_id = b'000-revlog-split-%d-%s' % self.target
2898 callback_id = b'000-revlog-split-%d-%s' % self.target
2887 else:
2899 else:
2888 callback_id = b'000-revlog-split-%d' % self.target[0]
2900 callback_id = b'000-revlog-split-%d' % self.target[0]
2889 tr.addfinalize(callback_id, finalize_callback)
2901 tr.addfinalize(callback_id, finalize_callback)
2890 tr.addabort(callback_id, abort_callback)
2902 tr.addabort(callback_id, abort_callback)
2891
2903
2892 self._format_flags &= ~FLAG_INLINE_DATA
2904 self._format_flags &= ~FLAG_INLINE_DATA
2893 self._inner.split_inline(
2905 self._inner.split_inline(
2894 tr,
2906 tr,
2895 self._format_flags | self._format_version,
2907 self._format_flags | self._format_version,
2896 new_index_file_path=new_index_file_path,
2908 new_index_file_path=new_index_file_path,
2897 )
2909 )
2898
2910
2899 self._inline = False
2911 self._inline = False
2900 if new_index_file_path is not None:
2912 if new_index_file_path is not None:
2901 self._indexfile = new_index_file_path
2913 self._indexfile = new_index_file_path
2902
2914
2903 nodemaputil.setup_persistent_nodemap(tr, self)
2915 nodemaputil.setup_persistent_nodemap(tr, self)
2904
2916
2905 def _nodeduplicatecallback(self, transaction, node):
2917 def _nodeduplicatecallback(self, transaction, node):
2906 """called when trying to add a node already stored."""
2918 """called when trying to add a node already stored."""
2907
2919
2908 @contextlib.contextmanager
2920 @contextlib.contextmanager
2909 def reading(self):
2921 def reading(self):
2910 with self._inner.reading():
2922 with self._inner.reading():
2911 yield
2923 yield
2912
2924
2913 @contextlib.contextmanager
2925 @contextlib.contextmanager
2914 def _writing(self, transaction):
2926 def _writing(self, transaction):
2915 if self._trypending:
2927 if self._trypending:
2916 msg = b'try to write in a `trypending` revlog: %s'
2928 msg = b'try to write in a `trypending` revlog: %s'
2917 msg %= self.display_id
2929 msg %= self.display_id
2918 raise error.ProgrammingError(msg)
2930 raise error.ProgrammingError(msg)
2919 if self._inner.is_writing:
2931 if self._inner.is_writing:
2920 yield
2932 yield
2921 else:
2933 else:
2922 data_end = None
2934 data_end = None
2923 sidedata_end = None
2935 sidedata_end = None
2924 if self._docket is not None:
2936 if self._docket is not None:
2925 data_end = self._docket.data_end
2937 data_end = self._docket.data_end
2926 sidedata_end = self._docket.sidedata_end
2938 sidedata_end = self._docket.sidedata_end
2927 with self._inner.writing(
2939 with self._inner.writing(
2928 transaction,
2940 transaction,
2929 data_end=data_end,
2941 data_end=data_end,
2930 sidedata_end=sidedata_end,
2942 sidedata_end=sidedata_end,
2931 ):
2943 ):
2932 yield
2944 yield
2933 if self._docket is not None:
2945 if self._docket is not None:
2934 self._write_docket(transaction)
2946 self._write_docket(transaction)
2935
2947
2936 @property
2948 @property
2937 def is_delaying(self):
2949 def is_delaying(self):
2938 return self._inner.is_delaying
2950 return self._inner.is_delaying
2939
2951
2940 def _write_docket(self, transaction):
2952 def _write_docket(self, transaction):
2941 """write the current docket on disk
2953 """write the current docket on disk
2942
2954
2943 Exist as a method to help changelog to implement transaction logic
2955 Exist as a method to help changelog to implement transaction logic
2944
2956
2945 We could also imagine using the same transaction logic for all revlog
2957 We could also imagine using the same transaction logic for all revlog
2946 since docket are cheap."""
2958 since docket are cheap."""
2947 self._docket.write(transaction)
2959 self._docket.write(transaction)
2948
2960
2949 def addrevision(
2961 def addrevision(
2950 self,
2962 self,
2951 text,
2963 text,
2952 transaction,
2964 transaction,
2953 link,
2965 link,
2954 p1,
2966 p1,
2955 p2,
2967 p2,
2956 cachedelta=None,
2968 cachedelta=None,
2957 node=None,
2969 node=None,
2958 flags=REVIDX_DEFAULT_FLAGS,
2970 flags=REVIDX_DEFAULT_FLAGS,
2959 deltacomputer=None,
2971 deltacomputer=None,
2960 sidedata=None,
2972 sidedata=None,
2961 ):
2973 ):
2962 """add a revision to the log
2974 """add a revision to the log
2963
2975
2964 text - the revision data to add
2976 text - the revision data to add
2965 transaction - the transaction object used for rollback
2977 transaction - the transaction object used for rollback
2966 link - the linkrev data to add
2978 link - the linkrev data to add
2967 p1, p2 - the parent nodeids of the revision
2979 p1, p2 - the parent nodeids of the revision
2968 cachedelta - an optional precomputed delta
2980 cachedelta - an optional precomputed delta
2969 node - nodeid of revision; typically node is not specified, and it is
2981 node - nodeid of revision; typically node is not specified, and it is
2970 computed by default as hash(text, p1, p2), however subclasses might
2982 computed by default as hash(text, p1, p2), however subclasses might
2971 use different hashing method (and override checkhash() in such case)
2983 use different hashing method (and override checkhash() in such case)
2972 flags - the known flags to set on the revision
2984 flags - the known flags to set on the revision
2973 deltacomputer - an optional deltacomputer instance shared between
2985 deltacomputer - an optional deltacomputer instance shared between
2974 multiple calls
2986 multiple calls
2975 """
2987 """
2976 if link == nullrev:
2988 if link == nullrev:
2977 raise error.RevlogError(
2989 raise error.RevlogError(
2978 _(b"attempted to add linkrev -1 to %s") % self.display_id
2990 _(b"attempted to add linkrev -1 to %s") % self.display_id
2979 )
2991 )
2980
2992
2981 if sidedata is None:
2993 if sidedata is None:
2982 sidedata = {}
2994 sidedata = {}
2983 elif sidedata and not self.feature_config.has_side_data:
2995 elif sidedata and not self.feature_config.has_side_data:
2984 raise error.ProgrammingError(
2996 raise error.ProgrammingError(
2985 _(b"trying to add sidedata to a revlog who don't support them")
2997 _(b"trying to add sidedata to a revlog who don't support them")
2986 )
2998 )
2987
2999
2988 if flags:
3000 if flags:
2989 node = node or self.hash(text, p1, p2)
3001 node = node or self.hash(text, p1, p2)
2990
3002
2991 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3003 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2992
3004
2993 # If the flag processor modifies the revision data, ignore any provided
3005 # If the flag processor modifies the revision data, ignore any provided
2994 # cachedelta.
3006 # cachedelta.
2995 if rawtext != text:
3007 if rawtext != text:
2996 cachedelta = None
3008 cachedelta = None
2997
3009
2998 if len(rawtext) > _maxentrysize:
3010 if len(rawtext) > _maxentrysize:
2999 raise error.RevlogError(
3011 raise error.RevlogError(
3000 _(
3012 _(
3001 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3013 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3002 )
3014 )
3003 % (self.display_id, len(rawtext))
3015 % (self.display_id, len(rawtext))
3004 )
3016 )
3005
3017
3006 node = node or self.hash(rawtext, p1, p2)
3018 node = node or self.hash(rawtext, p1, p2)
3007 rev = self.index.get_rev(node)
3019 rev = self.index.get_rev(node)
3008 if rev is not None:
3020 if rev is not None:
3009 return rev
3021 return rev
3010
3022
3011 if validatehash:
3023 if validatehash:
3012 self.checkhash(rawtext, node, p1=p1, p2=p2)
3024 self.checkhash(rawtext, node, p1=p1, p2=p2)
3013
3025
3014 return self.addrawrevision(
3026 return self.addrawrevision(
3015 rawtext,
3027 rawtext,
3016 transaction,
3028 transaction,
3017 link,
3029 link,
3018 p1,
3030 p1,
3019 p2,
3031 p2,
3020 node,
3032 node,
3021 flags,
3033 flags,
3022 cachedelta=cachedelta,
3034 cachedelta=cachedelta,
3023 deltacomputer=deltacomputer,
3035 deltacomputer=deltacomputer,
3024 sidedata=sidedata,
3036 sidedata=sidedata,
3025 )
3037 )
3026
3038
3027 def addrawrevision(
3039 def addrawrevision(
3028 self,
3040 self,
3029 rawtext,
3041 rawtext,
3030 transaction,
3042 transaction,
3031 link,
3043 link,
3032 p1,
3044 p1,
3033 p2,
3045 p2,
3034 node,
3046 node,
3035 flags,
3047 flags,
3036 cachedelta=None,
3048 cachedelta=None,
3037 deltacomputer=None,
3049 deltacomputer=None,
3038 sidedata=None,
3050 sidedata=None,
3039 ):
3051 ):
3040 """add a raw revision with known flags, node and parents
3052 """add a raw revision with known flags, node and parents
3041 useful when reusing a revision not stored in this revlog (ex: received
3053 useful when reusing a revision not stored in this revlog (ex: received
3042 over wire, or read from an external bundle).
3054 over wire, or read from an external bundle).
3043 """
3055 """
3044 with self._writing(transaction):
3056 with self._writing(transaction):
3045 return self._addrevision(
3057 return self._addrevision(
3046 node,
3058 node,
3047 rawtext,
3059 rawtext,
3048 transaction,
3060 transaction,
3049 link,
3061 link,
3050 p1,
3062 p1,
3051 p2,
3063 p2,
3052 flags,
3064 flags,
3053 cachedelta,
3065 cachedelta,
3054 deltacomputer=deltacomputer,
3066 deltacomputer=deltacomputer,
3055 sidedata=sidedata,
3067 sidedata=sidedata,
3056 )
3068 )
3057
3069
3058 def compress(self, data):
3070 def compress(self, data):
3059 return self._inner.compress(data)
3071 return self._inner.compress(data)
3060
3072
3061 def decompress(self, data):
3073 def decompress(self, data):
3062 return self._inner.decompress(data)
3074 return self._inner.decompress(data)
3063
3075
3064 def _addrevision(
3076 def _addrevision(
3065 self,
3077 self,
3066 node,
3078 node,
3067 rawtext,
3079 rawtext,
3068 transaction,
3080 transaction,
3069 link,
3081 link,
3070 p1,
3082 p1,
3071 p2,
3083 p2,
3072 flags,
3084 flags,
3073 cachedelta,
3085 cachedelta,
3074 alwayscache=False,
3086 alwayscache=False,
3075 deltacomputer=None,
3087 deltacomputer=None,
3076 sidedata=None,
3088 sidedata=None,
3077 ):
3089 ):
3078 """internal function to add revisions to the log
3090 """internal function to add revisions to the log
3079
3091
3080 see addrevision for argument descriptions.
3092 see addrevision for argument descriptions.
3081
3093
3082 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3094 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3083
3095
3084 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3096 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3085 be used.
3097 be used.
3086
3098
3087 invariants:
3099 invariants:
3088 - rawtext is optional (can be None); if not set, cachedelta must be set.
3100 - rawtext is optional (can be None); if not set, cachedelta must be set.
3089 if both are set, they must correspond to each other.
3101 if both are set, they must correspond to each other.
3090 """
3102 """
3091 if node == self.nullid:
3103 if node == self.nullid:
3092 raise error.RevlogError(
3104 raise error.RevlogError(
3093 _(b"%s: attempt to add null revision") % self.display_id
3105 _(b"%s: attempt to add null revision") % self.display_id
3094 )
3106 )
3095 if (
3107 if (
3096 node == self.nodeconstants.wdirid
3108 node == self.nodeconstants.wdirid
3097 or node in self.nodeconstants.wdirfilenodeids
3109 or node in self.nodeconstants.wdirfilenodeids
3098 ):
3110 ):
3099 raise error.RevlogError(
3111 raise error.RevlogError(
3100 _(b"%s: attempt to add wdir revision") % self.display_id
3112 _(b"%s: attempt to add wdir revision") % self.display_id
3101 )
3113 )
3102 if self._inner._writinghandles is None:
3114 if self._inner._writinghandles is None:
3103 msg = b'adding revision outside `revlog._writing` context'
3115 msg = b'adding revision outside `revlog._writing` context'
3104 raise error.ProgrammingError(msg)
3116 raise error.ProgrammingError(msg)
3105
3117
3106 btext = [rawtext]
3118 btext = [rawtext]
3107
3119
3108 curr = len(self)
3120 curr = len(self)
3109 prev = curr - 1
3121 prev = curr - 1
3110
3122
3111 offset = self._get_data_offset(prev)
3123 offset = self._get_data_offset(prev)
3112
3124
3113 if self._concurrencychecker:
3125 if self._concurrencychecker:
3114 ifh, dfh, sdfh = self._inner._writinghandles
3126 ifh, dfh, sdfh = self._inner._writinghandles
3115 # XXX no checking for the sidedata file
3127 # XXX no checking for the sidedata file
3116 if self._inline:
3128 if self._inline:
3117 # offset is "as if" it were in the .d file, so we need to add on
3129 # offset is "as if" it were in the .d file, so we need to add on
3118 # the size of the entry metadata.
3130 # the size of the entry metadata.
3119 self._concurrencychecker(
3131 self._concurrencychecker(
3120 ifh, self._indexfile, offset + curr * self.index.entry_size
3132 ifh, self._indexfile, offset + curr * self.index.entry_size
3121 )
3133 )
3122 else:
3134 else:
3123 # Entries in the .i are a consistent size.
3135 # Entries in the .i are a consistent size.
3124 self._concurrencychecker(
3136 self._concurrencychecker(
3125 ifh, self._indexfile, curr * self.index.entry_size
3137 ifh, self._indexfile, curr * self.index.entry_size
3126 )
3138 )
3127 self._concurrencychecker(dfh, self._datafile, offset)
3139 self._concurrencychecker(dfh, self._datafile, offset)
3128
3140
3129 p1r, p2r = self.rev(p1), self.rev(p2)
3141 p1r, p2r = self.rev(p1), self.rev(p2)
3130
3142
3131 # full versions are inserted when the needed deltas
3143 # full versions are inserted when the needed deltas
3132 # become comparable to the uncompressed text
3144 # become comparable to the uncompressed text
3133 if rawtext is None:
3145 if rawtext is None:
3134 # need rawtext size, before changed by flag processors, which is
3146 # need rawtext size, before changed by flag processors, which is
3135 # the non-raw size. use revlog explicitly to avoid filelog's extra
3147 # the non-raw size. use revlog explicitly to avoid filelog's extra
3136 # logic that might remove metadata size.
3148 # logic that might remove metadata size.
3137 textlen = mdiff.patchedsize(
3149 textlen = mdiff.patchedsize(
3138 revlog.size(self, cachedelta[0]), cachedelta[1]
3150 revlog.size(self, cachedelta[0]), cachedelta[1]
3139 )
3151 )
3140 else:
3152 else:
3141 textlen = len(rawtext)
3153 textlen = len(rawtext)
3142
3154
3143 if deltacomputer is None:
3155 if deltacomputer is None:
3144 write_debug = None
3156 write_debug = None
3145 if self.delta_config.debug_delta:
3157 if self.delta_config.debug_delta:
3146 write_debug = transaction._report
3158 write_debug = transaction._report
3147 deltacomputer = deltautil.deltacomputer(
3159 deltacomputer = deltautil.deltacomputer(
3148 self, write_debug=write_debug
3160 self, write_debug=write_debug
3149 )
3161 )
3150
3162
3151 if cachedelta is not None and len(cachedelta) == 2:
3163 if cachedelta is not None and len(cachedelta) == 2:
3152 # If the cached delta has no information about how it should be
3164 # If the cached delta has no information about how it should be
3153 # reused, add the default reuse instruction according to the
3165 # reused, add the default reuse instruction according to the
3154 # revlog's configuration.
3166 # revlog's configuration.
3155 if (
3167 if (
3156 self.delta_config.general_delta
3168 self.delta_config.general_delta
3157 and self.delta_config.lazy_delta_base
3169 and self.delta_config.lazy_delta_base
3158 ):
3170 ):
3159 delta_base_reuse = DELTA_BASE_REUSE_TRY
3171 delta_base_reuse = DELTA_BASE_REUSE_TRY
3160 else:
3172 else:
3161 delta_base_reuse = DELTA_BASE_REUSE_NO
3173 delta_base_reuse = DELTA_BASE_REUSE_NO
3162 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3174 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3163
3175
3164 revinfo = revlogutils.revisioninfo(
3176 revinfo = revlogutils.revisioninfo(
3165 node,
3177 node,
3166 p1,
3178 p1,
3167 p2,
3179 p2,
3168 btext,
3180 btext,
3169 textlen,
3181 textlen,
3170 cachedelta,
3182 cachedelta,
3171 flags,
3183 flags,
3172 )
3184 )
3173
3185
3174 deltainfo = deltacomputer.finddeltainfo(revinfo)
3186 deltainfo = deltacomputer.finddeltainfo(revinfo)
3175
3187
3176 compression_mode = COMP_MODE_INLINE
3188 compression_mode = COMP_MODE_INLINE
3177 if self._docket is not None:
3189 if self._docket is not None:
3178 default_comp = self._docket.default_compression_header
3190 default_comp = self._docket.default_compression_header
3179 r = deltautil.delta_compression(default_comp, deltainfo)
3191 r = deltautil.delta_compression(default_comp, deltainfo)
3180 compression_mode, deltainfo = r
3192 compression_mode, deltainfo = r
3181
3193
3182 sidedata_compression_mode = COMP_MODE_INLINE
3194 sidedata_compression_mode = COMP_MODE_INLINE
3183 if sidedata and self.feature_config.has_side_data:
3195 if sidedata and self.feature_config.has_side_data:
3184 sidedata_compression_mode = COMP_MODE_PLAIN
3196 sidedata_compression_mode = COMP_MODE_PLAIN
3185 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3197 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3186 sidedata_offset = self._docket.sidedata_end
3198 sidedata_offset = self._docket.sidedata_end
3187 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3199 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3188 if (
3200 if (
3189 h != b'u'
3201 h != b'u'
3190 and comp_sidedata[0:1] != b'\0'
3202 and comp_sidedata[0:1] != b'\0'
3191 and len(comp_sidedata) < len(serialized_sidedata)
3203 and len(comp_sidedata) < len(serialized_sidedata)
3192 ):
3204 ):
3193 assert not h
3205 assert not h
3194 if (
3206 if (
3195 comp_sidedata[0:1]
3207 comp_sidedata[0:1]
3196 == self._docket.default_compression_header
3208 == self._docket.default_compression_header
3197 ):
3209 ):
3198 sidedata_compression_mode = COMP_MODE_DEFAULT
3210 sidedata_compression_mode = COMP_MODE_DEFAULT
3199 serialized_sidedata = comp_sidedata
3211 serialized_sidedata = comp_sidedata
3200 else:
3212 else:
3201 sidedata_compression_mode = COMP_MODE_INLINE
3213 sidedata_compression_mode = COMP_MODE_INLINE
3202 serialized_sidedata = comp_sidedata
3214 serialized_sidedata = comp_sidedata
3203 else:
3215 else:
3204 serialized_sidedata = b""
3216 serialized_sidedata = b""
3205 # Don't store the offset if the sidedata is empty, that way
3217 # Don't store the offset if the sidedata is empty, that way
3206 # we can easily detect empty sidedata and they will be no different
3218 # we can easily detect empty sidedata and they will be no different
3207 # than ones we manually add.
3219 # than ones we manually add.
3208 sidedata_offset = 0
3220 sidedata_offset = 0
3209
3221
3210 rank = RANK_UNKNOWN
3222 rank = RANK_UNKNOWN
3211 if self.feature_config.compute_rank:
3223 if self.feature_config.compute_rank:
3212 if (p1r, p2r) == (nullrev, nullrev):
3224 if (p1r, p2r) == (nullrev, nullrev):
3213 rank = 1
3225 rank = 1
3214 elif p1r != nullrev and p2r == nullrev:
3226 elif p1r != nullrev and p2r == nullrev:
3215 rank = 1 + self.fast_rank(p1r)
3227 rank = 1 + self.fast_rank(p1r)
3216 elif p1r == nullrev and p2r != nullrev:
3228 elif p1r == nullrev and p2r != nullrev:
3217 rank = 1 + self.fast_rank(p2r)
3229 rank = 1 + self.fast_rank(p2r)
3218 else: # merge node
3230 else: # merge node
3219 if rustdagop is not None and self.index.rust_ext_compat:
3231 if rustdagop is not None and self.index.rust_ext_compat:
3220 rank = rustdagop.rank(self.index, p1r, p2r)
3232 rank = rustdagop.rank(self.index, p1r, p2r)
3221 else:
3233 else:
3222 pmin, pmax = sorted((p1r, p2r))
3234 pmin, pmax = sorted((p1r, p2r))
3223 rank = 1 + self.fast_rank(pmax)
3235 rank = 1 + self.fast_rank(pmax)
3224 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3236 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3225
3237
3226 e = revlogutils.entry(
3238 e = revlogutils.entry(
3227 flags=flags,
3239 flags=flags,
3228 data_offset=offset,
3240 data_offset=offset,
3229 data_compressed_length=deltainfo.deltalen,
3241 data_compressed_length=deltainfo.deltalen,
3230 data_uncompressed_length=textlen,
3242 data_uncompressed_length=textlen,
3231 data_compression_mode=compression_mode,
3243 data_compression_mode=compression_mode,
3232 data_delta_base=deltainfo.base,
3244 data_delta_base=deltainfo.base,
3233 link_rev=link,
3245 link_rev=link,
3234 parent_rev_1=p1r,
3246 parent_rev_1=p1r,
3235 parent_rev_2=p2r,
3247 parent_rev_2=p2r,
3236 node_id=node,
3248 node_id=node,
3237 sidedata_offset=sidedata_offset,
3249 sidedata_offset=sidedata_offset,
3238 sidedata_compressed_length=len(serialized_sidedata),
3250 sidedata_compressed_length=len(serialized_sidedata),
3239 sidedata_compression_mode=sidedata_compression_mode,
3251 sidedata_compression_mode=sidedata_compression_mode,
3240 rank=rank,
3252 rank=rank,
3241 )
3253 )
3242
3254
3243 self.index.append(e)
3255 self.index.append(e)
3244 entry = self.index.entry_binary(curr)
3256 entry = self.index.entry_binary(curr)
3245 if curr == 0 and self._docket is None:
3257 if curr == 0 and self._docket is None:
3246 header = self._format_flags | self._format_version
3258 header = self._format_flags | self._format_version
3247 header = self.index.pack_header(header)
3259 header = self.index.pack_header(header)
3248 entry = header + entry
3260 entry = header + entry
3249 self._writeentry(
3261 self._writeentry(
3250 transaction,
3262 transaction,
3251 entry,
3263 entry,
3252 deltainfo.data,
3264 deltainfo.data,
3253 link,
3265 link,
3254 offset,
3266 offset,
3255 serialized_sidedata,
3267 serialized_sidedata,
3256 sidedata_offset,
3268 sidedata_offset,
3257 )
3269 )
3258
3270
3259 rawtext = btext[0]
3271 rawtext = btext[0]
3260
3272
3261 if alwayscache and rawtext is None:
3273 if alwayscache and rawtext is None:
3262 rawtext = deltacomputer.buildtext(revinfo)
3274 rawtext = deltacomputer.buildtext(revinfo)
3263
3275
3264 if type(rawtext) == bytes: # only accept immutable objects
3276 if type(rawtext) == bytes: # only accept immutable objects
3265 self._inner._revisioncache = (node, curr, rawtext)
3277 self._inner._revisioncache = (node, curr, rawtext)
3266 self._chainbasecache[curr] = deltainfo.chainbase
3278 self._chainbasecache[curr] = deltainfo.chainbase
3267 return curr
3279 return curr
3268
3280
3269 def _get_data_offset(self, prev):
3281 def _get_data_offset(self, prev):
3270 """Returns the current offset in the (in-transaction) data file.
3282 """Returns the current offset in the (in-transaction) data file.
3271 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3283 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3272 file to store that information: since sidedata can be rewritten to the
3284 file to store that information: since sidedata can be rewritten to the
3273 end of the data file within a transaction, you can have cases where, for
3285 end of the data file within a transaction, you can have cases where, for
3274 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3286 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3275 to `n - 1`'s sidedata being written after `n`'s data.
3287 to `n - 1`'s sidedata being written after `n`'s data.
3276
3288
3277 TODO cache this in a docket file before getting out of experimental."""
3289 TODO cache this in a docket file before getting out of experimental."""
3278 if self._docket is None:
3290 if self._docket is None:
3279 return self.end(prev)
3291 return self.end(prev)
3280 else:
3292 else:
3281 return self._docket.data_end
3293 return self._docket.data_end
3282
3294
3283 def _writeentry(
3295 def _writeentry(
3284 self,
3296 self,
3285 transaction,
3297 transaction,
3286 entry,
3298 entry,
3287 data,
3299 data,
3288 link,
3300 link,
3289 offset,
3301 offset,
3290 sidedata,
3302 sidedata,
3291 sidedata_offset,
3303 sidedata_offset,
3292 ):
3304 ):
3293 # Files opened in a+ mode have inconsistent behavior on various
3305 # Files opened in a+ mode have inconsistent behavior on various
3294 # platforms. Windows requires that a file positioning call be made
3306 # platforms. Windows requires that a file positioning call be made
3295 # when the file handle transitions between reads and writes. See
3307 # when the file handle transitions between reads and writes. See
3296 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3308 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3297 # platforms, Python or the platform itself can be buggy. Some versions
3309 # platforms, Python or the platform itself can be buggy. Some versions
3298 # of Solaris have been observed to not append at the end of the file
3310 # of Solaris have been observed to not append at the end of the file
3299 # if the file was seeked to before the end. See issue4943 for more.
3311 # if the file was seeked to before the end. See issue4943 for more.
3300 #
3312 #
3301 # We work around this issue by inserting a seek() before writing.
3313 # We work around this issue by inserting a seek() before writing.
3302 # Note: This is likely not necessary on Python 3. However, because
3314 # Note: This is likely not necessary on Python 3. However, because
3303 # the file handle is reused for reads and may be seeked there, we need
3315 # the file handle is reused for reads and may be seeked there, we need
3304 # to be careful before changing this.
3316 # to be careful before changing this.
3305 index_end = data_end = sidedata_end = None
3317 index_end = data_end = sidedata_end = None
3306 if self._docket is not None:
3318 if self._docket is not None:
3307 index_end = self._docket.index_end
3319 index_end = self._docket.index_end
3308 data_end = self._docket.data_end
3320 data_end = self._docket.data_end
3309 sidedata_end = self._docket.sidedata_end
3321 sidedata_end = self._docket.sidedata_end
3310
3322
3311 files_end = self._inner.write_entry(
3323 files_end = self._inner.write_entry(
3312 transaction,
3324 transaction,
3313 entry,
3325 entry,
3314 data,
3326 data,
3315 link,
3327 link,
3316 offset,
3328 offset,
3317 sidedata,
3329 sidedata,
3318 sidedata_offset,
3330 sidedata_offset,
3319 index_end,
3331 index_end,
3320 data_end,
3332 data_end,
3321 sidedata_end,
3333 sidedata_end,
3322 )
3334 )
3323 self._enforceinlinesize(transaction)
3335 self._enforceinlinesize(transaction)
3324 if self._docket is not None:
3336 if self._docket is not None:
3325 self._docket.index_end = files_end[0]
3337 self._docket.index_end = files_end[0]
3326 self._docket.data_end = files_end[1]
3338 self._docket.data_end = files_end[1]
3327 self._docket.sidedata_end = files_end[2]
3339 self._docket.sidedata_end = files_end[2]
3328
3340
3329 nodemaputil.setup_persistent_nodemap(transaction, self)
3341 nodemaputil.setup_persistent_nodemap(transaction, self)
3330
3342
3331 def addgroup(
3343 def addgroup(
3332 self,
3344 self,
3333 deltas,
3345 deltas,
3334 linkmapper,
3346 linkmapper,
3335 transaction,
3347 transaction,
3336 alwayscache=False,
3348 alwayscache=False,
3337 addrevisioncb=None,
3349 addrevisioncb=None,
3338 duplicaterevisioncb=None,
3350 duplicaterevisioncb=None,
3339 debug_info=None,
3351 debug_info=None,
3340 delta_base_reuse_policy=None,
3352 delta_base_reuse_policy=None,
3341 ):
3353 ):
3342 """
3354 """
3343 add a delta group
3355 add a delta group
3344
3356
3345 given a set of deltas, add them to the revision log. the
3357 given a set of deltas, add them to the revision log. the
3346 first delta is against its parent, which should be in our
3358 first delta is against its parent, which should be in our
3347 log, the rest are against the previous delta.
3359 log, the rest are against the previous delta.
3348
3360
3349 If ``addrevisioncb`` is defined, it will be called with arguments of
3361 If ``addrevisioncb`` is defined, it will be called with arguments of
3350 this revlog and the node that was added.
3362 this revlog and the node that was added.
3351 """
3363 """
3352
3364
3353 if self._adding_group:
3365 if self._adding_group:
3354 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3366 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3355
3367
3356 # read the default delta-base reuse policy from revlog config if the
3368 # read the default delta-base reuse policy from revlog config if the
3357 # group did not specify one.
3369 # group did not specify one.
3358 if delta_base_reuse_policy is None:
3370 if delta_base_reuse_policy is None:
3359 if (
3371 if (
3360 self.delta_config.general_delta
3372 self.delta_config.general_delta
3361 and self.delta_config.lazy_delta_base
3373 and self.delta_config.lazy_delta_base
3362 ):
3374 ):
3363 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3375 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3364 else:
3376 else:
3365 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3377 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3366
3378
3367 self._adding_group = True
3379 self._adding_group = True
3368 empty = True
3380 empty = True
3369 try:
3381 try:
3370 with self._writing(transaction):
3382 with self._writing(transaction):
3371 write_debug = None
3383 write_debug = None
3372 if self.delta_config.debug_delta:
3384 if self.delta_config.debug_delta:
3373 write_debug = transaction._report
3385 write_debug = transaction._report
3374 deltacomputer = deltautil.deltacomputer(
3386 deltacomputer = deltautil.deltacomputer(
3375 self,
3387 self,
3376 write_debug=write_debug,
3388 write_debug=write_debug,
3377 debug_info=debug_info,
3389 debug_info=debug_info,
3378 )
3390 )
3379 # loop through our set of deltas
3391 # loop through our set of deltas
3380 for data in deltas:
3392 for data in deltas:
3381 (
3393 (
3382 node,
3394 node,
3383 p1,
3395 p1,
3384 p2,
3396 p2,
3385 linknode,
3397 linknode,
3386 deltabase,
3398 deltabase,
3387 delta,
3399 delta,
3388 flags,
3400 flags,
3389 sidedata,
3401 sidedata,
3390 ) = data
3402 ) = data
3391 link = linkmapper(linknode)
3403 link = linkmapper(linknode)
3392 flags = flags or REVIDX_DEFAULT_FLAGS
3404 flags = flags or REVIDX_DEFAULT_FLAGS
3393
3405
3394 rev = self.index.get_rev(node)
3406 rev = self.index.get_rev(node)
3395 if rev is not None:
3407 if rev is not None:
3396 # this can happen if two branches make the same change
3408 # this can happen if two branches make the same change
3397 self._nodeduplicatecallback(transaction, rev)
3409 self._nodeduplicatecallback(transaction, rev)
3398 if duplicaterevisioncb:
3410 if duplicaterevisioncb:
3399 duplicaterevisioncb(self, rev)
3411 duplicaterevisioncb(self, rev)
3400 empty = False
3412 empty = False
3401 continue
3413 continue
3402
3414
3403 for p in (p1, p2):
3415 for p in (p1, p2):
3404 if not self.index.has_node(p):
3416 if not self.index.has_node(p):
3405 raise error.LookupError(
3417 raise error.LookupError(
3406 p, self.radix, _(b'unknown parent')
3418 p, self.radix, _(b'unknown parent')
3407 )
3419 )
3408
3420
3409 if not self.index.has_node(deltabase):
3421 if not self.index.has_node(deltabase):
3410 raise error.LookupError(
3422 raise error.LookupError(
3411 deltabase, self.display_id, _(b'unknown delta base')
3423 deltabase, self.display_id, _(b'unknown delta base')
3412 )
3424 )
3413
3425
3414 baserev = self.rev(deltabase)
3426 baserev = self.rev(deltabase)
3415
3427
3416 if baserev != nullrev and self.iscensored(baserev):
3428 if baserev != nullrev and self.iscensored(baserev):
3417 # if base is censored, delta must be full replacement in a
3429 # if base is censored, delta must be full replacement in a
3418 # single patch operation
3430 # single patch operation
3419 hlen = struct.calcsize(b">lll")
3431 hlen = struct.calcsize(b">lll")
3420 oldlen = self.rawsize(baserev)
3432 oldlen = self.rawsize(baserev)
3421 newlen = len(delta) - hlen
3433 newlen = len(delta) - hlen
3422 if delta[:hlen] != mdiff.replacediffheader(
3434 if delta[:hlen] != mdiff.replacediffheader(
3423 oldlen, newlen
3435 oldlen, newlen
3424 ):
3436 ):
3425 raise error.CensoredBaseError(
3437 raise error.CensoredBaseError(
3426 self.display_id, self.node(baserev)
3438 self.display_id, self.node(baserev)
3427 )
3439 )
3428
3440
3429 if not flags and self._peek_iscensored(baserev, delta):
3441 if not flags and self._peek_iscensored(baserev, delta):
3430 flags |= REVIDX_ISCENSORED
3442 flags |= REVIDX_ISCENSORED
3431
3443
3432 # We assume consumers of addrevisioncb will want to retrieve
3444 # We assume consumers of addrevisioncb will want to retrieve
3433 # the added revision, which will require a call to
3445 # the added revision, which will require a call to
3434 # revision(). revision() will fast path if there is a cache
3446 # revision(). revision() will fast path if there is a cache
3435 # hit. So, we tell _addrevision() to always cache in this case.
3447 # hit. So, we tell _addrevision() to always cache in this case.
3436 # We're only using addgroup() in the context of changegroup
3448 # We're only using addgroup() in the context of changegroup
3437 # generation so the revision data can always be handled as raw
3449 # generation so the revision data can always be handled as raw
3438 # by the flagprocessor.
3450 # by the flagprocessor.
3439 rev = self._addrevision(
3451 rev = self._addrevision(
3440 node,
3452 node,
3441 None,
3453 None,
3442 transaction,
3454 transaction,
3443 link,
3455 link,
3444 p1,
3456 p1,
3445 p2,
3457 p2,
3446 flags,
3458 flags,
3447 (baserev, delta, delta_base_reuse_policy),
3459 (baserev, delta, delta_base_reuse_policy),
3448 alwayscache=alwayscache,
3460 alwayscache=alwayscache,
3449 deltacomputer=deltacomputer,
3461 deltacomputer=deltacomputer,
3450 sidedata=sidedata,
3462 sidedata=sidedata,
3451 )
3463 )
3452
3464
3453 if addrevisioncb:
3465 if addrevisioncb:
3454 addrevisioncb(self, rev)
3466 addrevisioncb(self, rev)
3455 empty = False
3467 empty = False
3456 finally:
3468 finally:
3457 self._adding_group = False
3469 self._adding_group = False
3458 return not empty
3470 return not empty
3459
3471
3460 def iscensored(self, rev):
3472 def iscensored(self, rev):
3461 """Check if a file revision is censored."""
3473 """Check if a file revision is censored."""
3462 if not self.feature_config.censorable:
3474 if not self.feature_config.censorable:
3463 return False
3475 return False
3464
3476
3465 return self.flags(rev) & REVIDX_ISCENSORED
3477 return self.flags(rev) & REVIDX_ISCENSORED
3466
3478
3467 def _peek_iscensored(self, baserev, delta):
3479 def _peek_iscensored(self, baserev, delta):
3468 """Quickly check if a delta produces a censored revision."""
3480 """Quickly check if a delta produces a censored revision."""
3469 if not self.feature_config.censorable:
3481 if not self.feature_config.censorable:
3470 return False
3482 return False
3471
3483
3472 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3484 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3473
3485
3474 def getstrippoint(self, minlink):
3486 def getstrippoint(self, minlink):
3475 """find the minimum rev that must be stripped to strip the linkrev
3487 """find the minimum rev that must be stripped to strip the linkrev
3476
3488
3477 Returns a tuple containing the minimum rev and a set of all revs that
3489 Returns a tuple containing the minimum rev and a set of all revs that
3478 have linkrevs that will be broken by this strip.
3490 have linkrevs that will be broken by this strip.
3479 """
3491 """
3480 return storageutil.resolvestripinfo(
3492 return storageutil.resolvestripinfo(
3481 minlink,
3493 minlink,
3482 len(self) - 1,
3494 len(self) - 1,
3483 self.headrevs(),
3495 self.headrevs(),
3484 self.linkrev,
3496 self.linkrev,
3485 self.parentrevs,
3497 self.parentrevs,
3486 )
3498 )
3487
3499
3488 def strip(self, minlink, transaction):
3500 def strip(self, minlink, transaction):
3489 """truncate the revlog on the first revision with a linkrev >= minlink
3501 """truncate the revlog on the first revision with a linkrev >= minlink
3490
3502
3491 This function is called when we're stripping revision minlink and
3503 This function is called when we're stripping revision minlink and
3492 its descendants from the repository.
3504 its descendants from the repository.
3493
3505
3494 We have to remove all revisions with linkrev >= minlink, because
3506 We have to remove all revisions with linkrev >= minlink, because
3495 the equivalent changelog revisions will be renumbered after the
3507 the equivalent changelog revisions will be renumbered after the
3496 strip.
3508 strip.
3497
3509
3498 So we truncate the revlog on the first of these revisions, and
3510 So we truncate the revlog on the first of these revisions, and
3499 trust that the caller has saved the revisions that shouldn't be
3511 trust that the caller has saved the revisions that shouldn't be
3500 removed and that it'll re-add them after this truncation.
3512 removed and that it'll re-add them after this truncation.
3501 """
3513 """
3502 if len(self) == 0:
3514 if len(self) == 0:
3503 return
3515 return
3504
3516
3505 rev, _ = self.getstrippoint(minlink)
3517 rev, _ = self.getstrippoint(minlink)
3506 if rev == len(self):
3518 if rev == len(self):
3507 return
3519 return
3508
3520
3509 # first truncate the files on disk
3521 # first truncate the files on disk
3510 data_end = self.start(rev)
3522 data_end = self.start(rev)
3511 if not self._inline:
3523 if not self._inline:
3512 transaction.add(self._datafile, data_end)
3524 transaction.add(self._datafile, data_end)
3513 end = rev * self.index.entry_size
3525 end = rev * self.index.entry_size
3514 else:
3526 else:
3515 end = data_end + (rev * self.index.entry_size)
3527 end = data_end + (rev * self.index.entry_size)
3516
3528
3517 if self._sidedatafile:
3529 if self._sidedatafile:
3518 sidedata_end = self.sidedata_cut_off(rev)
3530 sidedata_end = self.sidedata_cut_off(rev)
3519 transaction.add(self._sidedatafile, sidedata_end)
3531 transaction.add(self._sidedatafile, sidedata_end)
3520
3532
3521 transaction.add(self._indexfile, end)
3533 transaction.add(self._indexfile, end)
3522 if self._docket is not None:
3534 if self._docket is not None:
3523 # XXX we could, leverage the docket while stripping. However it is
3535 # XXX we could, leverage the docket while stripping. However it is
3524 # not powerfull enough at the time of this comment
3536 # not powerfull enough at the time of this comment
3525 self._docket.index_end = end
3537 self._docket.index_end = end
3526 self._docket.data_end = data_end
3538 self._docket.data_end = data_end
3527 self._docket.sidedata_end = sidedata_end
3539 self._docket.sidedata_end = sidedata_end
3528 self._docket.write(transaction, stripping=True)
3540 self._docket.write(transaction, stripping=True)
3529
3541
3530 # then reset internal state in memory to forget those revisions
3542 # then reset internal state in memory to forget those revisions
3531 self._chaininfocache = util.lrucachedict(500)
3543 self._chaininfocache = util.lrucachedict(500)
3532 self._inner.clear_cache()
3544 self._inner.clear_cache()
3533
3545
3534 del self.index[rev:-1]
3546 del self.index[rev:-1]
3535
3547
3536 def checksize(self):
3548 def checksize(self):
3537 """Check size of index and data files
3549 """Check size of index and data files
3538
3550
3539 return a (dd, di) tuple.
3551 return a (dd, di) tuple.
3540 - dd: extra bytes for the "data" file
3552 - dd: extra bytes for the "data" file
3541 - di: extra bytes for the "index" file
3553 - di: extra bytes for the "index" file
3542
3554
3543 A healthy revlog will return (0, 0).
3555 A healthy revlog will return (0, 0).
3544 """
3556 """
3545 expected = 0
3557 expected = 0
3546 if len(self):
3558 if len(self):
3547 expected = max(0, self.end(len(self) - 1))
3559 expected = max(0, self.end(len(self) - 1))
3548
3560
3549 try:
3561 try:
3550 with self._datafp() as f:
3562 with self._datafp() as f:
3551 f.seek(0, io.SEEK_END)
3563 f.seek(0, io.SEEK_END)
3552 actual = f.tell()
3564 actual = f.tell()
3553 dd = actual - expected
3565 dd = actual - expected
3554 except FileNotFoundError:
3566 except FileNotFoundError:
3555 dd = 0
3567 dd = 0
3556
3568
3557 try:
3569 try:
3558 f = self.opener(self._indexfile)
3570 f = self.opener(self._indexfile)
3559 f.seek(0, io.SEEK_END)
3571 f.seek(0, io.SEEK_END)
3560 actual = f.tell()
3572 actual = f.tell()
3561 f.close()
3573 f.close()
3562 s = self.index.entry_size
3574 s = self.index.entry_size
3563 i = max(0, actual // s)
3575 i = max(0, actual // s)
3564 di = actual - (i * s)
3576 di = actual - (i * s)
3565 if self._inline:
3577 if self._inline:
3566 databytes = 0
3578 databytes = 0
3567 for r in self:
3579 for r in self:
3568 databytes += max(0, self.length(r))
3580 databytes += max(0, self.length(r))
3569 dd = 0
3581 dd = 0
3570 di = actual - len(self) * s - databytes
3582 di = actual - len(self) * s - databytes
3571 except FileNotFoundError:
3583 except FileNotFoundError:
3572 di = 0
3584 di = 0
3573
3585
3574 return (dd, di)
3586 return (dd, di)
3575
3587
3576 def files(self):
3588 def files(self):
3577 """return list of files that compose this revlog"""
3589 """return list of files that compose this revlog"""
3578 res = [self._indexfile]
3590 res = [self._indexfile]
3579 if self._docket_file is None:
3591 if self._docket_file is None:
3580 if not self._inline:
3592 if not self._inline:
3581 res.append(self._datafile)
3593 res.append(self._datafile)
3582 else:
3594 else:
3583 res.append(self._docket_file)
3595 res.append(self._docket_file)
3584 res.extend(self._docket.old_index_filepaths(include_empty=False))
3596 res.extend(self._docket.old_index_filepaths(include_empty=False))
3585 if self._docket.data_end:
3597 if self._docket.data_end:
3586 res.append(self._datafile)
3598 res.append(self._datafile)
3587 res.extend(self._docket.old_data_filepaths(include_empty=False))
3599 res.extend(self._docket.old_data_filepaths(include_empty=False))
3588 if self._docket.sidedata_end:
3600 if self._docket.sidedata_end:
3589 res.append(self._sidedatafile)
3601 res.append(self._sidedatafile)
3590 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3602 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3591 return res
3603 return res
3592
3604
3593 def emitrevisions(
3605 def emitrevisions(
3594 self,
3606 self,
3595 nodes,
3607 nodes,
3596 nodesorder=None,
3608 nodesorder=None,
3597 revisiondata=False,
3609 revisiondata=False,
3598 assumehaveparentrevisions=False,
3610 assumehaveparentrevisions=False,
3599 deltamode=repository.CG_DELTAMODE_STD,
3611 deltamode=repository.CG_DELTAMODE_STD,
3600 sidedata_helpers=None,
3612 sidedata_helpers=None,
3601 debug_info=None,
3613 debug_info=None,
3602 ):
3614 ):
3603 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3615 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3604 raise error.ProgrammingError(
3616 raise error.ProgrammingError(
3605 b'unhandled value for nodesorder: %s' % nodesorder
3617 b'unhandled value for nodesorder: %s' % nodesorder
3606 )
3618 )
3607
3619
3608 if nodesorder is None and not self.delta_config.general_delta:
3620 if nodesorder is None and not self.delta_config.general_delta:
3609 nodesorder = b'storage'
3621 nodesorder = b'storage'
3610
3622
3611 if (
3623 if (
3612 not self._storedeltachains
3624 not self._storedeltachains
3613 and deltamode != repository.CG_DELTAMODE_PREV
3625 and deltamode != repository.CG_DELTAMODE_PREV
3614 ):
3626 ):
3615 deltamode = repository.CG_DELTAMODE_FULL
3627 deltamode = repository.CG_DELTAMODE_FULL
3616
3628
3617 return storageutil.emitrevisions(
3629 return storageutil.emitrevisions(
3618 self,
3630 self,
3619 nodes,
3631 nodes,
3620 nodesorder,
3632 nodesorder,
3621 revlogrevisiondelta,
3633 revlogrevisiondelta,
3622 deltaparentfn=self.deltaparent,
3634 deltaparentfn=self.deltaparent,
3623 candeltafn=self._candelta,
3635 candeltafn=self._candelta,
3624 rawsizefn=self.rawsize,
3636 rawsizefn=self.rawsize,
3625 revdifffn=self.revdiff,
3637 revdifffn=self.revdiff,
3626 flagsfn=self.flags,
3638 flagsfn=self.flags,
3627 deltamode=deltamode,
3639 deltamode=deltamode,
3628 revisiondata=revisiondata,
3640 revisiondata=revisiondata,
3629 assumehaveparentrevisions=assumehaveparentrevisions,
3641 assumehaveparentrevisions=assumehaveparentrevisions,
3630 sidedata_helpers=sidedata_helpers,
3642 sidedata_helpers=sidedata_helpers,
3631 debug_info=debug_info,
3643 debug_info=debug_info,
3632 )
3644 )
3633
3645
3634 DELTAREUSEALWAYS = b'always'
3646 DELTAREUSEALWAYS = b'always'
3635 DELTAREUSESAMEREVS = b'samerevs'
3647 DELTAREUSESAMEREVS = b'samerevs'
3636 DELTAREUSENEVER = b'never'
3648 DELTAREUSENEVER = b'never'
3637
3649
3638 DELTAREUSEFULLADD = b'fulladd'
3650 DELTAREUSEFULLADD = b'fulladd'
3639
3651
3640 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3652 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3641
3653
3642 def clone(
3654 def clone(
3643 self,
3655 self,
3644 tr,
3656 tr,
3645 destrevlog,
3657 destrevlog,
3646 addrevisioncb=None,
3658 addrevisioncb=None,
3647 deltareuse=DELTAREUSESAMEREVS,
3659 deltareuse=DELTAREUSESAMEREVS,
3648 forcedeltabothparents=None,
3660 forcedeltabothparents=None,
3649 sidedata_helpers=None,
3661 sidedata_helpers=None,
3650 ):
3662 ):
3651 """Copy this revlog to another, possibly with format changes.
3663 """Copy this revlog to another, possibly with format changes.
3652
3664
3653 The destination revlog will contain the same revisions and nodes.
3665 The destination revlog will contain the same revisions and nodes.
3654 However, it may not be bit-for-bit identical due to e.g. delta encoding
3666 However, it may not be bit-for-bit identical due to e.g. delta encoding
3655 differences.
3667 differences.
3656
3668
3657 The ``deltareuse`` argument control how deltas from the existing revlog
3669 The ``deltareuse`` argument control how deltas from the existing revlog
3658 are preserved in the destination revlog. The argument can have the
3670 are preserved in the destination revlog. The argument can have the
3659 following values:
3671 following values:
3660
3672
3661 DELTAREUSEALWAYS
3673 DELTAREUSEALWAYS
3662 Deltas will always be reused (if possible), even if the destination
3674 Deltas will always be reused (if possible), even if the destination
3663 revlog would not select the same revisions for the delta. This is the
3675 revlog would not select the same revisions for the delta. This is the
3664 fastest mode of operation.
3676 fastest mode of operation.
3665 DELTAREUSESAMEREVS
3677 DELTAREUSESAMEREVS
3666 Deltas will be reused if the destination revlog would pick the same
3678 Deltas will be reused if the destination revlog would pick the same
3667 revisions for the delta. This mode strikes a balance between speed
3679 revisions for the delta. This mode strikes a balance between speed
3668 and optimization.
3680 and optimization.
3669 DELTAREUSENEVER
3681 DELTAREUSENEVER
3670 Deltas will never be reused. This is the slowest mode of execution.
3682 Deltas will never be reused. This is the slowest mode of execution.
3671 This mode can be used to recompute deltas (e.g. if the diff/delta
3683 This mode can be used to recompute deltas (e.g. if the diff/delta
3672 algorithm changes).
3684 algorithm changes).
3673 DELTAREUSEFULLADD
3685 DELTAREUSEFULLADD
3674 Revision will be re-added as if their were new content. This is
3686 Revision will be re-added as if their were new content. This is
3675 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3687 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3676 eg: large file detection and handling.
3688 eg: large file detection and handling.
3677
3689
3678 Delta computation can be slow, so the choice of delta reuse policy can
3690 Delta computation can be slow, so the choice of delta reuse policy can
3679 significantly affect run time.
3691 significantly affect run time.
3680
3692
3681 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3693 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3682 two extremes. Deltas will be reused if they are appropriate. But if the
3694 two extremes. Deltas will be reused if they are appropriate. But if the
3683 delta could choose a better revision, it will do so. This means if you
3695 delta could choose a better revision, it will do so. This means if you
3684 are converting a non-generaldelta revlog to a generaldelta revlog,
3696 are converting a non-generaldelta revlog to a generaldelta revlog,
3685 deltas will be recomputed if the delta's parent isn't a parent of the
3697 deltas will be recomputed if the delta's parent isn't a parent of the
3686 revision.
3698 revision.
3687
3699
3688 In addition to the delta policy, the ``forcedeltabothparents``
3700 In addition to the delta policy, the ``forcedeltabothparents``
3689 argument controls whether to force compute deltas against both parents
3701 argument controls whether to force compute deltas against both parents
3690 for merges. By default, the current default is used.
3702 for merges. By default, the current default is used.
3691
3703
3692 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3704 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3693 `sidedata_helpers`.
3705 `sidedata_helpers`.
3694 """
3706 """
3695 if deltareuse not in self.DELTAREUSEALL:
3707 if deltareuse not in self.DELTAREUSEALL:
3696 raise ValueError(
3708 raise ValueError(
3697 _(b'value for deltareuse invalid: %s') % deltareuse
3709 _(b'value for deltareuse invalid: %s') % deltareuse
3698 )
3710 )
3699
3711
3700 if len(destrevlog):
3712 if len(destrevlog):
3701 raise ValueError(_(b'destination revlog is not empty'))
3713 raise ValueError(_(b'destination revlog is not empty'))
3702
3714
3703 if getattr(self, 'filteredrevs', None):
3715 if getattr(self, 'filteredrevs', None):
3704 raise ValueError(_(b'source revlog has filtered revisions'))
3716 raise ValueError(_(b'source revlog has filtered revisions'))
3705 if getattr(destrevlog, 'filteredrevs', None):
3717 if getattr(destrevlog, 'filteredrevs', None):
3706 raise ValueError(_(b'destination revlog has filtered revisions'))
3718 raise ValueError(_(b'destination revlog has filtered revisions'))
3707
3719
3708 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3720 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3709 # if possible.
3721 # if possible.
3710 old_delta_config = destrevlog.delta_config
3722 old_delta_config = destrevlog.delta_config
3711 destrevlog.delta_config = destrevlog.delta_config.copy()
3723 destrevlog.delta_config = destrevlog.delta_config.copy()
3712
3724
3713 try:
3725 try:
3714 if deltareuse == self.DELTAREUSEALWAYS:
3726 if deltareuse == self.DELTAREUSEALWAYS:
3715 destrevlog.delta_config.lazy_delta_base = True
3727 destrevlog.delta_config.lazy_delta_base = True
3716 destrevlog.delta_config.lazy_delta = True
3728 destrevlog.delta_config.lazy_delta = True
3717 elif deltareuse == self.DELTAREUSESAMEREVS:
3729 elif deltareuse == self.DELTAREUSESAMEREVS:
3718 destrevlog.delta_config.lazy_delta_base = False
3730 destrevlog.delta_config.lazy_delta_base = False
3719 destrevlog.delta_config.lazy_delta = True
3731 destrevlog.delta_config.lazy_delta = True
3720 elif deltareuse == self.DELTAREUSENEVER:
3732 elif deltareuse == self.DELTAREUSENEVER:
3721 destrevlog.delta_config.lazy_delta_base = False
3733 destrevlog.delta_config.lazy_delta_base = False
3722 destrevlog.delta_config.lazy_delta = False
3734 destrevlog.delta_config.lazy_delta = False
3723
3735
3724 delta_both_parents = (
3736 delta_both_parents = (
3725 forcedeltabothparents or old_delta_config.delta_both_parents
3737 forcedeltabothparents or old_delta_config.delta_both_parents
3726 )
3738 )
3727 destrevlog.delta_config.delta_both_parents = delta_both_parents
3739 destrevlog.delta_config.delta_both_parents = delta_both_parents
3728
3740
3729 with self.reading(), destrevlog._writing(tr):
3741 with self.reading(), destrevlog._writing(tr):
3730 self._clone(
3742 self._clone(
3731 tr,
3743 tr,
3732 destrevlog,
3744 destrevlog,
3733 addrevisioncb,
3745 addrevisioncb,
3734 deltareuse,
3746 deltareuse,
3735 forcedeltabothparents,
3747 forcedeltabothparents,
3736 sidedata_helpers,
3748 sidedata_helpers,
3737 )
3749 )
3738
3750
3739 finally:
3751 finally:
3740 destrevlog.delta_config = old_delta_config
3752 destrevlog.delta_config = old_delta_config
3741
3753
3742 def _clone(
3754 def _clone(
3743 self,
3755 self,
3744 tr,
3756 tr,
3745 destrevlog,
3757 destrevlog,
3746 addrevisioncb,
3758 addrevisioncb,
3747 deltareuse,
3759 deltareuse,
3748 forcedeltabothparents,
3760 forcedeltabothparents,
3749 sidedata_helpers,
3761 sidedata_helpers,
3750 ):
3762 ):
3751 """perform the core duty of `revlog.clone` after parameter processing"""
3763 """perform the core duty of `revlog.clone` after parameter processing"""
3752 write_debug = None
3764 write_debug = None
3753 if self.delta_config.debug_delta:
3765 if self.delta_config.debug_delta:
3754 write_debug = tr._report
3766 write_debug = tr._report
3755 deltacomputer = deltautil.deltacomputer(
3767 deltacomputer = deltautil.deltacomputer(
3756 destrevlog,
3768 destrevlog,
3757 write_debug=write_debug,
3769 write_debug=write_debug,
3758 )
3770 )
3759 index = self.index
3771 index = self.index
3760 for rev in self:
3772 for rev in self:
3761 entry = index[rev]
3773 entry = index[rev]
3762
3774
3763 # Some classes override linkrev to take filtered revs into
3775 # Some classes override linkrev to take filtered revs into
3764 # account. Use raw entry from index.
3776 # account. Use raw entry from index.
3765 flags = entry[0] & 0xFFFF
3777 flags = entry[0] & 0xFFFF
3766 linkrev = entry[4]
3778 linkrev = entry[4]
3767 p1 = index[entry[5]][7]
3779 p1 = index[entry[5]][7]
3768 p2 = index[entry[6]][7]
3780 p2 = index[entry[6]][7]
3769 node = entry[7]
3781 node = entry[7]
3770
3782
3771 # (Possibly) reuse the delta from the revlog if allowed and
3783 # (Possibly) reuse the delta from the revlog if allowed and
3772 # the revlog chunk is a delta.
3784 # the revlog chunk is a delta.
3773 cachedelta = None
3785 cachedelta = None
3774 rawtext = None
3786 rawtext = None
3775 if deltareuse == self.DELTAREUSEFULLADD:
3787 if deltareuse == self.DELTAREUSEFULLADD:
3776 text = self._revisiondata(rev)
3788 text = self._revisiondata(rev)
3777 sidedata = self.sidedata(rev)
3789 sidedata = self.sidedata(rev)
3778
3790
3779 if sidedata_helpers is not None:
3791 if sidedata_helpers is not None:
3780 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3792 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3781 self, sidedata_helpers, sidedata, rev
3793 self, sidedata_helpers, sidedata, rev
3782 )
3794 )
3783 flags = flags | new_flags[0] & ~new_flags[1]
3795 flags = flags | new_flags[0] & ~new_flags[1]
3784
3796
3785 destrevlog.addrevision(
3797 destrevlog.addrevision(
3786 text,
3798 text,
3787 tr,
3799 tr,
3788 linkrev,
3800 linkrev,
3789 p1,
3801 p1,
3790 p2,
3802 p2,
3791 cachedelta=cachedelta,
3803 cachedelta=cachedelta,
3792 node=node,
3804 node=node,
3793 flags=flags,
3805 flags=flags,
3794 deltacomputer=deltacomputer,
3806 deltacomputer=deltacomputer,
3795 sidedata=sidedata,
3807 sidedata=sidedata,
3796 )
3808 )
3797 else:
3809 else:
3798 if destrevlog.delta_config.lazy_delta:
3810 if destrevlog.delta_config.lazy_delta:
3799 dp = self.deltaparent(rev)
3811 dp = self.deltaparent(rev)
3800 if dp != nullrev:
3812 if dp != nullrev:
3801 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3813 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3802
3814
3803 sidedata = None
3815 sidedata = None
3804 if not cachedelta:
3816 if not cachedelta:
3805 try:
3817 try:
3806 rawtext = self._revisiondata(rev)
3818 rawtext = self._revisiondata(rev)
3807 except error.CensoredNodeError as censored:
3819 except error.CensoredNodeError as censored:
3808 assert flags & REVIDX_ISCENSORED
3820 assert flags & REVIDX_ISCENSORED
3809 rawtext = censored.tombstone
3821 rawtext = censored.tombstone
3810 sidedata = self.sidedata(rev)
3822 sidedata = self.sidedata(rev)
3811 if sidedata is None:
3823 if sidedata is None:
3812 sidedata = self.sidedata(rev)
3824 sidedata = self.sidedata(rev)
3813
3825
3814 if sidedata_helpers is not None:
3826 if sidedata_helpers is not None:
3815 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3827 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3816 self, sidedata_helpers, sidedata, rev
3828 self, sidedata_helpers, sidedata, rev
3817 )
3829 )
3818 flags = flags | new_flags[0] & ~new_flags[1]
3830 flags = flags | new_flags[0] & ~new_flags[1]
3819
3831
3820 destrevlog._addrevision(
3832 destrevlog._addrevision(
3821 node,
3833 node,
3822 rawtext,
3834 rawtext,
3823 tr,
3835 tr,
3824 linkrev,
3836 linkrev,
3825 p1,
3837 p1,
3826 p2,
3838 p2,
3827 flags,
3839 flags,
3828 cachedelta,
3840 cachedelta,
3829 deltacomputer=deltacomputer,
3841 deltacomputer=deltacomputer,
3830 sidedata=sidedata,
3842 sidedata=sidedata,
3831 )
3843 )
3832
3844
3833 if addrevisioncb:
3845 if addrevisioncb:
3834 addrevisioncb(self, rev, node)
3846 addrevisioncb(self, rev, node)
3835
3847
3836 def censorrevision(self, tr, censor_nodes, tombstone=b''):
3848 def censorrevision(self, tr, censor_nodes, tombstone=b''):
3837 if self._format_version == REVLOGV0:
3849 if self._format_version == REVLOGV0:
3838 raise error.RevlogError(
3850 raise error.RevlogError(
3839 _(b'cannot censor with version %d revlogs')
3851 _(b'cannot censor with version %d revlogs')
3840 % self._format_version
3852 % self._format_version
3841 )
3853 )
3842 elif self._format_version == REVLOGV1:
3854 elif self._format_version == REVLOGV1:
3843 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
3855 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
3844 else:
3856 else:
3845 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
3857 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
3846
3858
3847 def verifyintegrity(self, state):
3859 def verifyintegrity(self, state):
3848 """Verifies the integrity of the revlog.
3860 """Verifies the integrity of the revlog.
3849
3861
3850 Yields ``revlogproblem`` instances describing problems that are
3862 Yields ``revlogproblem`` instances describing problems that are
3851 found.
3863 found.
3852 """
3864 """
3853 dd, di = self.checksize()
3865 dd, di = self.checksize()
3854 if dd:
3866 if dd:
3855 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3867 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3856 if di:
3868 if di:
3857 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3869 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3858
3870
3859 version = self._format_version
3871 version = self._format_version
3860
3872
3861 # The verifier tells us what version revlog we should be.
3873 # The verifier tells us what version revlog we should be.
3862 if version != state[b'expectedversion']:
3874 if version != state[b'expectedversion']:
3863 yield revlogproblem(
3875 yield revlogproblem(
3864 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3876 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3865 % (self.display_id, version, state[b'expectedversion'])
3877 % (self.display_id, version, state[b'expectedversion'])
3866 )
3878 )
3867
3879
3868 state[b'skipread'] = set()
3880 state[b'skipread'] = set()
3869 state[b'safe_renamed'] = set()
3881 state[b'safe_renamed'] = set()
3870
3882
3871 for rev in self:
3883 for rev in self:
3872 node = self.node(rev)
3884 node = self.node(rev)
3873
3885
3874 # Verify contents. 4 cases to care about:
3886 # Verify contents. 4 cases to care about:
3875 #
3887 #
3876 # common: the most common case
3888 # common: the most common case
3877 # rename: with a rename
3889 # rename: with a rename
3878 # meta: file content starts with b'\1\n', the metadata
3890 # meta: file content starts with b'\1\n', the metadata
3879 # header defined in filelog.py, but without a rename
3891 # header defined in filelog.py, but without a rename
3880 # ext: content stored externally
3892 # ext: content stored externally
3881 #
3893 #
3882 # More formally, their differences are shown below:
3894 # More formally, their differences are shown below:
3883 #
3895 #
3884 # | common | rename | meta | ext
3896 # | common | rename | meta | ext
3885 # -------------------------------------------------------
3897 # -------------------------------------------------------
3886 # flags() | 0 | 0 | 0 | not 0
3898 # flags() | 0 | 0 | 0 | not 0
3887 # renamed() | False | True | False | ?
3899 # renamed() | False | True | False | ?
3888 # rawtext[0:2]=='\1\n'| False | True | True | ?
3900 # rawtext[0:2]=='\1\n'| False | True | True | ?
3889 #
3901 #
3890 # "rawtext" means the raw text stored in revlog data, which
3902 # "rawtext" means the raw text stored in revlog data, which
3891 # could be retrieved by "rawdata(rev)". "text"
3903 # could be retrieved by "rawdata(rev)". "text"
3892 # mentioned below is "revision(rev)".
3904 # mentioned below is "revision(rev)".
3893 #
3905 #
3894 # There are 3 different lengths stored physically:
3906 # There are 3 different lengths stored physically:
3895 # 1. L1: rawsize, stored in revlog index
3907 # 1. L1: rawsize, stored in revlog index
3896 # 2. L2: len(rawtext), stored in revlog data
3908 # 2. L2: len(rawtext), stored in revlog data
3897 # 3. L3: len(text), stored in revlog data if flags==0, or
3909 # 3. L3: len(text), stored in revlog data if flags==0, or
3898 # possibly somewhere else if flags!=0
3910 # possibly somewhere else if flags!=0
3899 #
3911 #
3900 # L1 should be equal to L2. L3 could be different from them.
3912 # L1 should be equal to L2. L3 could be different from them.
3901 # "text" may or may not affect commit hash depending on flag
3913 # "text" may or may not affect commit hash depending on flag
3902 # processors (see flagutil.addflagprocessor).
3914 # processors (see flagutil.addflagprocessor).
3903 #
3915 #
3904 # | common | rename | meta | ext
3916 # | common | rename | meta | ext
3905 # -------------------------------------------------
3917 # -------------------------------------------------
3906 # rawsize() | L1 | L1 | L1 | L1
3918 # rawsize() | L1 | L1 | L1 | L1
3907 # size() | L1 | L2-LM | L1(*) | L1 (?)
3919 # size() | L1 | L2-LM | L1(*) | L1 (?)
3908 # len(rawtext) | L2 | L2 | L2 | L2
3920 # len(rawtext) | L2 | L2 | L2 | L2
3909 # len(text) | L2 | L2 | L2 | L3
3921 # len(text) | L2 | L2 | L2 | L3
3910 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3922 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3911 #
3923 #
3912 # LM: length of metadata, depending on rawtext
3924 # LM: length of metadata, depending on rawtext
3913 # (*): not ideal, see comment in filelog.size
3925 # (*): not ideal, see comment in filelog.size
3914 # (?): could be "- len(meta)" if the resolved content has
3926 # (?): could be "- len(meta)" if the resolved content has
3915 # rename metadata
3927 # rename metadata
3916 #
3928 #
3917 # Checks needed to be done:
3929 # Checks needed to be done:
3918 # 1. length check: L1 == L2, in all cases.
3930 # 1. length check: L1 == L2, in all cases.
3919 # 2. hash check: depending on flag processor, we may need to
3931 # 2. hash check: depending on flag processor, we may need to
3920 # use either "text" (external), or "rawtext" (in revlog).
3932 # use either "text" (external), or "rawtext" (in revlog).
3921
3933
3922 try:
3934 try:
3923 skipflags = state.get(b'skipflags', 0)
3935 skipflags = state.get(b'skipflags', 0)
3924 if skipflags:
3936 if skipflags:
3925 skipflags &= self.flags(rev)
3937 skipflags &= self.flags(rev)
3926
3938
3927 _verify_revision(self, skipflags, state, node)
3939 _verify_revision(self, skipflags, state, node)
3928
3940
3929 l1 = self.rawsize(rev)
3941 l1 = self.rawsize(rev)
3930 l2 = len(self.rawdata(node))
3942 l2 = len(self.rawdata(node))
3931
3943
3932 if l1 != l2:
3944 if l1 != l2:
3933 yield revlogproblem(
3945 yield revlogproblem(
3934 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3946 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3935 node=node,
3947 node=node,
3936 )
3948 )
3937
3949
3938 except error.CensoredNodeError:
3950 except error.CensoredNodeError:
3939 if state[b'erroroncensored']:
3951 if state[b'erroroncensored']:
3940 yield revlogproblem(
3952 yield revlogproblem(
3941 error=_(b'censored file data'), node=node
3953 error=_(b'censored file data'), node=node
3942 )
3954 )
3943 state[b'skipread'].add(node)
3955 state[b'skipread'].add(node)
3944 except Exception as e:
3956 except Exception as e:
3945 yield revlogproblem(
3957 yield revlogproblem(
3946 error=_(b'unpacking %s: %s')
3958 error=_(b'unpacking %s: %s')
3947 % (short(node), stringutil.forcebytestr(e)),
3959 % (short(node), stringutil.forcebytestr(e)),
3948 node=node,
3960 node=node,
3949 )
3961 )
3950 state[b'skipread'].add(node)
3962 state[b'skipread'].add(node)
3951
3963
3952 def storageinfo(
3964 def storageinfo(
3953 self,
3965 self,
3954 exclusivefiles=False,
3966 exclusivefiles=False,
3955 sharedfiles=False,
3967 sharedfiles=False,
3956 revisionscount=False,
3968 revisionscount=False,
3957 trackedsize=False,
3969 trackedsize=False,
3958 storedsize=False,
3970 storedsize=False,
3959 ):
3971 ):
3960 d = {}
3972 d = {}
3961
3973
3962 if exclusivefiles:
3974 if exclusivefiles:
3963 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3975 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3964 if not self._inline:
3976 if not self._inline:
3965 d[b'exclusivefiles'].append((self.opener, self._datafile))
3977 d[b'exclusivefiles'].append((self.opener, self._datafile))
3966
3978
3967 if sharedfiles:
3979 if sharedfiles:
3968 d[b'sharedfiles'] = []
3980 d[b'sharedfiles'] = []
3969
3981
3970 if revisionscount:
3982 if revisionscount:
3971 d[b'revisionscount'] = len(self)
3983 d[b'revisionscount'] = len(self)
3972
3984
3973 if trackedsize:
3985 if trackedsize:
3974 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3986 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3975
3987
3976 if storedsize:
3988 if storedsize:
3977 d[b'storedsize'] = sum(
3989 d[b'storedsize'] = sum(
3978 self.opener.stat(path).st_size for path in self.files()
3990 self.opener.stat(path).st_size for path in self.files()
3979 )
3991 )
3980
3992
3981 return d
3993 return d
3982
3994
3983 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3995 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3984 if not self.feature_config.has_side_data:
3996 if not self.feature_config.has_side_data:
3985 return
3997 return
3986 # revlog formats with sidedata support does not support inline
3998 # revlog formats with sidedata support does not support inline
3987 assert not self._inline
3999 assert not self._inline
3988 if not helpers[1] and not helpers[2]:
4000 if not helpers[1] and not helpers[2]:
3989 # Nothing to generate or remove
4001 # Nothing to generate or remove
3990 return
4002 return
3991
4003
3992 new_entries = []
4004 new_entries = []
3993 # append the new sidedata
4005 # append the new sidedata
3994 with self._writing(transaction):
4006 with self._writing(transaction):
3995 ifh, dfh, sdfh = self._inner._writinghandles
4007 ifh, dfh, sdfh = self._inner._writinghandles
3996 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4008 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3997
4009
3998 current_offset = sdfh.tell()
4010 current_offset = sdfh.tell()
3999 for rev in range(startrev, endrev + 1):
4011 for rev in range(startrev, endrev + 1):
4000 entry = self.index[rev]
4012 entry = self.index[rev]
4001 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4013 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4002 store=self,
4014 store=self,
4003 sidedata_helpers=helpers,
4015 sidedata_helpers=helpers,
4004 sidedata={},
4016 sidedata={},
4005 rev=rev,
4017 rev=rev,
4006 )
4018 )
4007
4019
4008 serialized_sidedata = sidedatautil.serialize_sidedata(
4020 serialized_sidedata = sidedatautil.serialize_sidedata(
4009 new_sidedata
4021 new_sidedata
4010 )
4022 )
4011
4023
4012 sidedata_compression_mode = COMP_MODE_INLINE
4024 sidedata_compression_mode = COMP_MODE_INLINE
4013 if serialized_sidedata and self.feature_config.has_side_data:
4025 if serialized_sidedata and self.feature_config.has_side_data:
4014 sidedata_compression_mode = COMP_MODE_PLAIN
4026 sidedata_compression_mode = COMP_MODE_PLAIN
4015 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4027 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4016 if (
4028 if (
4017 h != b'u'
4029 h != b'u'
4018 and comp_sidedata[0] != b'\0'
4030 and comp_sidedata[0] != b'\0'
4019 and len(comp_sidedata) < len(serialized_sidedata)
4031 and len(comp_sidedata) < len(serialized_sidedata)
4020 ):
4032 ):
4021 assert not h
4033 assert not h
4022 if (
4034 if (
4023 comp_sidedata[0]
4035 comp_sidedata[0]
4024 == self._docket.default_compression_header
4036 == self._docket.default_compression_header
4025 ):
4037 ):
4026 sidedata_compression_mode = COMP_MODE_DEFAULT
4038 sidedata_compression_mode = COMP_MODE_DEFAULT
4027 serialized_sidedata = comp_sidedata
4039 serialized_sidedata = comp_sidedata
4028 else:
4040 else:
4029 sidedata_compression_mode = COMP_MODE_INLINE
4041 sidedata_compression_mode = COMP_MODE_INLINE
4030 serialized_sidedata = comp_sidedata
4042 serialized_sidedata = comp_sidedata
4031 if entry[8] != 0 or entry[9] != 0:
4043 if entry[8] != 0 or entry[9] != 0:
4032 # rewriting entries that already have sidedata is not
4044 # rewriting entries that already have sidedata is not
4033 # supported yet, because it introduces garbage data in the
4045 # supported yet, because it introduces garbage data in the
4034 # revlog.
4046 # revlog.
4035 msg = b"rewriting existing sidedata is not supported yet"
4047 msg = b"rewriting existing sidedata is not supported yet"
4036 raise error.Abort(msg)
4048 raise error.Abort(msg)
4037
4049
4038 # Apply (potential) flags to add and to remove after running
4050 # Apply (potential) flags to add and to remove after running
4039 # the sidedata helpers
4051 # the sidedata helpers
4040 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4052 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4041 entry_update = (
4053 entry_update = (
4042 current_offset,
4054 current_offset,
4043 len(serialized_sidedata),
4055 len(serialized_sidedata),
4044 new_offset_flags,
4056 new_offset_flags,
4045 sidedata_compression_mode,
4057 sidedata_compression_mode,
4046 )
4058 )
4047
4059
4048 # the sidedata computation might have move the file cursors around
4060 # the sidedata computation might have move the file cursors around
4049 sdfh.seek(current_offset, os.SEEK_SET)
4061 sdfh.seek(current_offset, os.SEEK_SET)
4050 sdfh.write(serialized_sidedata)
4062 sdfh.write(serialized_sidedata)
4051 new_entries.append(entry_update)
4063 new_entries.append(entry_update)
4052 current_offset += len(serialized_sidedata)
4064 current_offset += len(serialized_sidedata)
4053 self._docket.sidedata_end = sdfh.tell()
4065 self._docket.sidedata_end = sdfh.tell()
4054
4066
4055 # rewrite the new index entries
4067 # rewrite the new index entries
4056 ifh.seek(startrev * self.index.entry_size)
4068 ifh.seek(startrev * self.index.entry_size)
4057 for i, e in enumerate(new_entries):
4069 for i, e in enumerate(new_entries):
4058 rev = startrev + i
4070 rev = startrev + i
4059 self.index.replace_sidedata_info(rev, *e)
4071 self.index.replace_sidedata_info(rev, *e)
4060 packed = self.index.entry_binary(rev)
4072 packed = self.index.entry_binary(rev)
4061 if rev == 0 and self._docket is None:
4073 if rev == 0 and self._docket is None:
4062 header = self._format_flags | self._format_version
4074 header = self._format_flags | self._format_version
4063 header = self.index.pack_header(header)
4075 header = self.index.pack_header(header)
4064 packed = header + packed
4076 packed = header + packed
4065 ifh.write(packed)
4077 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now