##// END OF EJS Templates
typing: lock in new pytype gains from making revlog related classes typeable...
Matt Harbison -
r52719:0338fb20 default
parent child Browse files
Show More
@@ -1,480 +1,480 b''
1 # debugcommands.py - debug logic for remotefilelog
1 # debugcommands.py - debug logic for remotefilelog
2 #
2 #
3 # Copyright 2013 Facebook, Inc.
3 # Copyright 2013 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import os
8 import os
9 import zlib
9 import zlib
10
10
11 from mercurial.node import (
11 from mercurial.node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 sha1nodeconstants,
14 sha1nodeconstants,
15 short,
15 short,
16 )
16 )
17 from mercurial.i18n import _
17 from mercurial.i18n import _
18 from mercurial.pycompat import open
18 from mercurial.pycompat import open
19 from mercurial import (
19 from mercurial import (
20 error,
20 error,
21 filelog,
21 filelog,
22 lock as lockmod,
22 lock as lockmod,
23 pycompat,
23 pycompat,
24 revlog,
24 revlog,
25 )
25 )
26 from mercurial.utils import hashutil
26 from mercurial.utils import hashutil
27 from . import (
27 from . import (
28 constants,
28 constants,
29 datapack,
29 datapack,
30 fileserverclient,
30 fileserverclient,
31 historypack,
31 historypack,
32 repack,
32 repack,
33 shallowutil,
33 shallowutil,
34 )
34 )
35
35
36
36
37 def debugremotefilelog(ui, path, **opts) -> None:
37 def debugremotefilelog(ui, path, **opts) -> None:
38 decompress = opts.get('decompress')
38 decompress = opts.get('decompress')
39
39
40 size, firstnode, mapping = parsefileblob(path, decompress)
40 size, firstnode, mapping = parsefileblob(path, decompress)
41
41
42 ui.status(_(b"size: %d bytes\n") % size)
42 ui.status(_(b"size: %d bytes\n") % size)
43 ui.status(_(b"path: %s \n") % path)
43 ui.status(_(b"path: %s \n") % path)
44 ui.status(_(b"key: %s \n") % (short(firstnode)))
44 ui.status(_(b"key: %s \n") % (short(firstnode)))
45 ui.status(_(b"\n"))
45 ui.status(_(b"\n"))
46 ui.status(
46 ui.status(
47 _(b"%12s => %12s %13s %13s %12s\n")
47 _(b"%12s => %12s %13s %13s %12s\n")
48 % (b"node", b"p1", b"p2", b"linknode", b"copyfrom")
48 % (b"node", b"p1", b"p2", b"linknode", b"copyfrom")
49 )
49 )
50
50
51 queue = [firstnode]
51 queue = [firstnode]
52 while queue:
52 while queue:
53 node = queue.pop(0)
53 node = queue.pop(0)
54 p1, p2, linknode, copyfrom = mapping[node]
54 p1, p2, linknode, copyfrom = mapping[node]
55 ui.status(
55 ui.status(
56 _(b"%s => %s %s %s %s\n")
56 _(b"%s => %s %s %s %s\n")
57 % (short(node), short(p1), short(p2), short(linknode), copyfrom)
57 % (short(node), short(p1), short(p2), short(linknode), copyfrom)
58 )
58 )
59 if p1 != sha1nodeconstants.nullid:
59 if p1 != sha1nodeconstants.nullid:
60 queue.append(p1)
60 queue.append(p1)
61 if p2 != sha1nodeconstants.nullid:
61 if p2 != sha1nodeconstants.nullid:
62 queue.append(p2)
62 queue.append(p2)
63
63
64
64
65 def buildtemprevlog(repo, file):
65 def buildtemprevlog(repo, file) -> filelog.FileLog:
66 # get filename key
66 # get filename key
67 filekey = hex(hashutil.sha1(file).digest())
67 filekey = hex(hashutil.sha1(file).digest())
68 filedir = os.path.join(repo.path, b'store/data', filekey)
68 filedir = os.path.join(repo.path, b'store/data', filekey)
69
69
70 # sort all entries based on linkrev
70 # sort all entries based on linkrev
71 fctxs = []
71 fctxs = []
72 for filenode in os.listdir(filedir):
72 for filenode in os.listdir(filedir):
73 if b'_old' not in filenode:
73 if b'_old' not in filenode:
74 fctxs.append(repo.filectx(file, fileid=bin(filenode)))
74 fctxs.append(repo.filectx(file, fileid=bin(filenode)))
75
75
76 fctxs = sorted(fctxs, key=lambda x: x.linkrev())
76 fctxs = sorted(fctxs, key=lambda x: x.linkrev())
77
77
78 # add to revlog
78 # add to revlog
79 temppath = repo.sjoin(b'data/temprevlog.i')
79 temppath = repo.sjoin(b'data/temprevlog.i')
80 if os.path.exists(temppath):
80 if os.path.exists(temppath):
81 os.remove(temppath)
81 os.remove(temppath)
82 r = filelog.filelog(repo.svfs, b'temprevlog')
82 r = filelog.filelog(repo.svfs, b'temprevlog')
83
83
84 class faket:
84 class faket:
85 def add(self, a, b, c):
85 def add(self, a, b, c):
86 pass
86 pass
87
87
88 t = faket()
88 t = faket()
89 for fctx in fctxs:
89 for fctx in fctxs:
90 if fctx.node() not in repo:
90 if fctx.node() not in repo:
91 continue
91 continue
92
92
93 p = fctx.filelog().parents(fctx.filenode())
93 p = fctx.filelog().parents(fctx.filenode())
94 meta = {}
94 meta = {}
95 if fctx.renamed():
95 if fctx.renamed():
96 meta[b'copy'] = fctx.renamed()[0]
96 meta[b'copy'] = fctx.renamed()[0]
97 meta[b'copyrev'] = hex(fctx.renamed()[1])
97 meta[b'copyrev'] = hex(fctx.renamed()[1])
98
98
99 r.add(fctx.data(), meta, t, fctx.linkrev(), p[0], p[1])
99 r.add(fctx.data(), meta, t, fctx.linkrev(), p[0], p[1])
100
100
101 return r
101 return r
102
102
103
103
104 def debugindex(orig, ui, repo, file_=None, **opts):
104 def debugindex(orig, ui, repo, file_=None, **opts):
105 """dump the contents of an index file"""
105 """dump the contents of an index file"""
106 if (
106 if (
107 opts.get('changelog')
107 opts.get('changelog')
108 or opts.get('manifest')
108 or opts.get('manifest')
109 or opts.get('dir')
109 or opts.get('dir')
110 or not shallowutil.isenabled(repo)
110 or not shallowutil.isenabled(repo)
111 or not repo.shallowmatch(file_)
111 or not repo.shallowmatch(file_)
112 ):
112 ):
113 return orig(ui, repo, file_, **opts)
113 return orig(ui, repo, file_, **opts)
114
114
115 r = buildtemprevlog(repo, file_)
115 r = buildtemprevlog(repo, file_)
116
116
117 # debugindex like normal
117 # debugindex like normal
118 format = opts.get('format', 0)
118 format = opts.get('format', 0)
119 if format not in (0, 1):
119 if format not in (0, 1):
120 raise error.Abort(_(b"unknown format %d") % format)
120 raise error.Abort(_(b"unknown format %d") % format)
121
121
122 generaldelta = r.get_revlog()._format_flags & revlog.FLAG_GENERALDELTA
122 generaldelta = r.get_revlog()._format_flags & revlog.FLAG_GENERALDELTA
123 if generaldelta:
123 if generaldelta:
124 basehdr = b' delta'
124 basehdr = b' delta'
125 else:
125 else:
126 basehdr = b' base'
126 basehdr = b' base'
127
127
128 if format == 0:
128 if format == 0:
129 ui.write(
129 ui.write(
130 (
130 (
131 b" rev offset length " + basehdr + b" linkrev"
131 b" rev offset length " + basehdr + b" linkrev"
132 b" nodeid p1 p2\n"
132 b" nodeid p1 p2\n"
133 )
133 )
134 )
134 )
135 elif format == 1:
135 elif format == 1:
136 ui.write(
136 ui.write(
137 (
137 (
138 b" rev flag offset length"
138 b" rev flag offset length"
139 b" size " + basehdr + b" link p1 p2"
139 b" size " + basehdr + b" link p1 p2"
140 b" nodeid\n"
140 b" nodeid\n"
141 )
141 )
142 )
142 )
143
143
144 for i in r:
144 for i in r:
145 node = r.node(i)
145 node = r.node(i)
146 if generaldelta:
146 if generaldelta:
147 base = r.get_revlog().deltaparent(i)
147 base = r.get_revlog().deltaparent(i)
148 else:
148 else:
149 base = r.get_revlog().chainbase(i)
149 base = r.get_revlog().chainbase(i)
150 if format == 0:
150 if format == 0:
151 try:
151 try:
152 pp = r.parents(node)
152 pp = r.parents(node)
153 except Exception:
153 except Exception:
154 pp = [repo.nullid, repo.nullid]
154 pp = [repo.nullid, repo.nullid]
155 ui.write(
155 ui.write(
156 b"% 6d % 9d % 7d % 6d % 7d %s %s %s\n"
156 b"% 6d % 9d % 7d % 6d % 7d %s %s %s\n"
157 % (
157 % (
158 i,
158 i,
159 r.get_revlog().start(i),
159 r.get_revlog().start(i),
160 r.get_revlog().length(i),
160 r.get_revlog().length(i),
161 base,
161 base,
162 r.linkrev(i),
162 r.linkrev(i),
163 short(node),
163 short(node),
164 short(pp[0]),
164 short(pp[0]),
165 short(pp[1]),
165 short(pp[1]),
166 )
166 )
167 )
167 )
168 elif format == 1:
168 elif format == 1:
169 pr = r.parentrevs(i)
169 pr = r.parentrevs(i)
170 ui.write(
170 ui.write(
171 b"% 6d %04x % 8d % 8d % 8d % 6d % 6d % 6d % 6d %s\n"
171 b"% 6d %04x % 8d % 8d % 8d % 6d % 6d % 6d % 6d %s\n"
172 % (
172 % (
173 i,
173 i,
174 r.get_revlog().flags(i),
174 r.get_revlog().flags(i),
175 r.get_revlog().start(i),
175 r.get_revlog().start(i),
176 r.get_revlog().length(i),
176 r.get_revlog().length(i),
177 r.get_revlog().rawsize(i),
177 r.get_revlog().rawsize(i),
178 base,
178 base,
179 r.linkrev(i),
179 r.linkrev(i),
180 pr[0],
180 pr[0],
181 pr[1],
181 pr[1],
182 short(node),
182 short(node),
183 )
183 )
184 )
184 )
185
185
186
186
187 def debugindexdot(orig, ui, repo, file_):
187 def debugindexdot(orig, ui, repo, file_):
188 """dump an index DAG as a graphviz dot file"""
188 """dump an index DAG as a graphviz dot file"""
189 if not shallowutil.isenabled(repo):
189 if not shallowutil.isenabled(repo):
190 return orig(ui, repo, file_)
190 return orig(ui, repo, file_)
191
191
192 r = buildtemprevlog(repo, os.path.basename(file_)[:-2])
192 r = buildtemprevlog(repo, os.path.basename(file_)[:-2])
193
193
194 ui.writenoi18n(b"digraph G {\n")
194 ui.writenoi18n(b"digraph G {\n")
195 for i in r:
195 for i in r:
196 node = r.node(i)
196 node = r.node(i)
197 pp = r.parents(node)
197 pp = r.parents(node)
198 ui.write(b"\t%d -> %d\n" % (r.rev(pp[0]), i))
198 ui.write(b"\t%d -> %d\n" % (r.rev(pp[0]), i))
199 if pp[1] != repo.nullid:
199 if pp[1] != repo.nullid:
200 ui.write(b"\t%d -> %d\n" % (r.rev(pp[1]), i))
200 ui.write(b"\t%d -> %d\n" % (r.rev(pp[1]), i))
201 ui.write(b"}\n")
201 ui.write(b"}\n")
202
202
203
203
204 def verifyremotefilelog(ui, path, **opts):
204 def verifyremotefilelog(ui, path, **opts):
205 decompress = opts.get('decompress')
205 decompress = opts.get('decompress')
206
206
207 for root, dirs, files in os.walk(path):
207 for root, dirs, files in os.walk(path):
208 for file in files:
208 for file in files:
209 if file == b"repos":
209 if file == b"repos":
210 continue
210 continue
211 filepath = os.path.join(root, file)
211 filepath = os.path.join(root, file)
212 size, firstnode, mapping = parsefileblob(filepath, decompress)
212 size, firstnode, mapping = parsefileblob(filepath, decompress)
213 for p1, p2, linknode, copyfrom in mapping.values():
213 for p1, p2, linknode, copyfrom in mapping.values():
214 if linknode == sha1nodeconstants.nullid:
214 if linknode == sha1nodeconstants.nullid:
215 actualpath = os.path.relpath(root, path)
215 actualpath = os.path.relpath(root, path)
216 key = fileserverclient.getcachekey(
216 key = fileserverclient.getcachekey(
217 b"reponame", actualpath, file
217 b"reponame", actualpath, file
218 )
218 )
219 ui.status(
219 ui.status(
220 b"%s %s\n" % (key, os.path.relpath(filepath, path))
220 b"%s %s\n" % (key, os.path.relpath(filepath, path))
221 )
221 )
222
222
223
223
224 def _decompressblob(raw):
224 def _decompressblob(raw):
225 return zlib.decompress(raw)
225 return zlib.decompress(raw)
226
226
227
227
228 def parsefileblob(path, decompress):
228 def parsefileblob(path, decompress):
229 f = open(path, b"rb")
229 f = open(path, b"rb")
230 try:
230 try:
231 raw = f.read()
231 raw = f.read()
232 finally:
232 finally:
233 f.close()
233 f.close()
234
234
235 if decompress:
235 if decompress:
236 raw = _decompressblob(raw)
236 raw = _decompressblob(raw)
237
237
238 offset, size, flags = shallowutil.parsesizeflags(raw)
238 offset, size, flags = shallowutil.parsesizeflags(raw)
239 start = offset + size
239 start = offset + size
240
240
241 firstnode = None
241 firstnode = None
242
242
243 mapping = {}
243 mapping = {}
244 while start < len(raw):
244 while start < len(raw):
245 divider = raw.index(b'\0', start + 80)
245 divider = raw.index(b'\0', start + 80)
246
246
247 currentnode = raw[start : (start + 20)]
247 currentnode = raw[start : (start + 20)]
248 if not firstnode:
248 if not firstnode:
249 firstnode = currentnode
249 firstnode = currentnode
250
250
251 p1 = raw[(start + 20) : (start + 40)]
251 p1 = raw[(start + 20) : (start + 40)]
252 p2 = raw[(start + 40) : (start + 60)]
252 p2 = raw[(start + 40) : (start + 60)]
253 linknode = raw[(start + 60) : (start + 80)]
253 linknode = raw[(start + 60) : (start + 80)]
254 copyfrom = raw[(start + 80) : divider]
254 copyfrom = raw[(start + 80) : divider]
255
255
256 mapping[currentnode] = (p1, p2, linknode, copyfrom)
256 mapping[currentnode] = (p1, p2, linknode, copyfrom)
257 start = divider + 1
257 start = divider + 1
258
258
259 return size, firstnode, mapping
259 return size, firstnode, mapping
260
260
261
261
262 def debugdatapack(ui, *paths, **opts):
262 def debugdatapack(ui, *paths, **opts):
263 for path in paths:
263 for path in paths:
264 if b'.data' in path:
264 if b'.data' in path:
265 path = path[: path.index(b'.data')]
265 path = path[: path.index(b'.data')]
266 ui.write(b"%s:\n" % path)
266 ui.write(b"%s:\n" % path)
267 dpack = datapack.datapack(path)
267 dpack = datapack.datapack(path)
268 node = opts.get('node')
268 node = opts.get('node')
269 if node:
269 if node:
270 deltachain = dpack.getdeltachain(b'', bin(node))
270 deltachain = dpack.getdeltachain(b'', bin(node))
271 dumpdeltachain(ui, deltachain, **opts)
271 dumpdeltachain(ui, deltachain, **opts)
272 return
272 return
273
273
274 if opts.get('long'):
274 if opts.get('long'):
275 hashformatter = hex
275 hashformatter = hex
276 hashlen = 42
276 hashlen = 42
277 else:
277 else:
278 hashformatter = short
278 hashformatter = short
279 hashlen = 14
279 hashlen = 14
280
280
281 lastfilename = None
281 lastfilename = None
282 totaldeltasize = 0
282 totaldeltasize = 0
283 totalblobsize = 0
283 totalblobsize = 0
284
284
285 def printtotals():
285 def printtotals():
286 if lastfilename is not None:
286 if lastfilename is not None:
287 ui.write(b"\n")
287 ui.write(b"\n")
288 if not totaldeltasize or not totalblobsize:
288 if not totaldeltasize or not totalblobsize:
289 return
289 return
290 difference = totalblobsize - totaldeltasize
290 difference = totalblobsize - totaldeltasize
291 deltastr = b"%0.1f%% %s" % (
291 deltastr = b"%0.1f%% %s" % (
292 (100.0 * abs(difference) / totalblobsize),
292 (100.0 * abs(difference) / totalblobsize),
293 (b"smaller" if difference > 0 else b"bigger"),
293 (b"smaller" if difference > 0 else b"bigger"),
294 )
294 )
295
295
296 ui.writenoi18n(
296 ui.writenoi18n(
297 b"Total:%s%s %s (%s)\n"
297 b"Total:%s%s %s (%s)\n"
298 % (
298 % (
299 b"".ljust(2 * hashlen - len(b"Total:")),
299 b"".ljust(2 * hashlen - len(b"Total:")),
300 (b'%d' % totaldeltasize).ljust(12),
300 (b'%d' % totaldeltasize).ljust(12),
301 (b'%d' % totalblobsize).ljust(9),
301 (b'%d' % totalblobsize).ljust(9),
302 deltastr,
302 deltastr,
303 )
303 )
304 )
304 )
305
305
306 bases = {}
306 bases = {}
307 nodes = set()
307 nodes = set()
308 failures = 0
308 failures = 0
309 for filename, node, deltabase, deltalen in dpack.iterentries():
309 for filename, node, deltabase, deltalen in dpack.iterentries():
310 bases[node] = deltabase
310 bases[node] = deltabase
311 if node in nodes:
311 if node in nodes:
312 ui.write((b"Bad entry: %s appears twice\n" % short(node)))
312 ui.write((b"Bad entry: %s appears twice\n" % short(node)))
313 failures += 1
313 failures += 1
314 nodes.add(node)
314 nodes.add(node)
315 if filename != lastfilename:
315 if filename != lastfilename:
316 printtotals()
316 printtotals()
317 name = b'(empty name)' if filename == b'' else filename
317 name = b'(empty name)' if filename == b'' else filename
318 ui.write(b"%s:\n" % name)
318 ui.write(b"%s:\n" % name)
319 ui.write(
319 ui.write(
320 b"%s%s%s%s\n"
320 b"%s%s%s%s\n"
321 % (
321 % (
322 b"Node".ljust(hashlen),
322 b"Node".ljust(hashlen),
323 b"Delta Base".ljust(hashlen),
323 b"Delta Base".ljust(hashlen),
324 b"Delta Length".ljust(14),
324 b"Delta Length".ljust(14),
325 b"Blob Size".ljust(9),
325 b"Blob Size".ljust(9),
326 )
326 )
327 )
327 )
328 lastfilename = filename
328 lastfilename = filename
329 totalblobsize = 0
329 totalblobsize = 0
330 totaldeltasize = 0
330 totaldeltasize = 0
331
331
332 # Metadata could be missing, in which case it will be an empty dict.
332 # Metadata could be missing, in which case it will be an empty dict.
333 meta = dpack.getmeta(filename, node)
333 meta = dpack.getmeta(filename, node)
334 if constants.METAKEYSIZE in meta:
334 if constants.METAKEYSIZE in meta:
335 blobsize = meta[constants.METAKEYSIZE]
335 blobsize = meta[constants.METAKEYSIZE]
336 totaldeltasize += deltalen
336 totaldeltasize += deltalen
337 totalblobsize += blobsize
337 totalblobsize += blobsize
338 else:
338 else:
339 blobsize = b"(missing)"
339 blobsize = b"(missing)"
340 ui.write(
340 ui.write(
341 b"%s %s %s%s\n"
341 b"%s %s %s%s\n"
342 % (
342 % (
343 hashformatter(node),
343 hashformatter(node),
344 hashformatter(deltabase),
344 hashformatter(deltabase),
345 (b'%d' % deltalen).ljust(14),
345 (b'%d' % deltalen).ljust(14),
346 pycompat.bytestr(blobsize),
346 pycompat.bytestr(blobsize),
347 )
347 )
348 )
348 )
349
349
350 if filename is not None:
350 if filename is not None:
351 printtotals()
351 printtotals()
352
352
353 failures += _sanitycheck(ui, set(nodes), bases)
353 failures += _sanitycheck(ui, set(nodes), bases)
354 if failures > 1:
354 if failures > 1:
355 ui.warn((b"%d failures\n" % failures))
355 ui.warn((b"%d failures\n" % failures))
356 return 1
356 return 1
357
357
358
358
359 def _sanitycheck(ui, nodes, bases):
359 def _sanitycheck(ui, nodes, bases):
360 """
360 """
361 Does some basic sanity checking on a packfiles with ``nodes`` ``bases`` (a
361 Does some basic sanity checking on a packfiles with ``nodes`` ``bases`` (a
362 mapping of node->base):
362 mapping of node->base):
363
363
364 - Each deltabase must itself be a node elsewhere in the pack
364 - Each deltabase must itself be a node elsewhere in the pack
365 - There must be no cycles
365 - There must be no cycles
366 """
366 """
367 failures = 0
367 failures = 0
368 for node in nodes:
368 for node in nodes:
369 seen = set()
369 seen = set()
370 current = node
370 current = node
371 deltabase = bases[current]
371 deltabase = bases[current]
372
372
373 while deltabase != sha1nodeconstants.nullid:
373 while deltabase != sha1nodeconstants.nullid:
374 if deltabase not in nodes:
374 if deltabase not in nodes:
375 ui.warn(
375 ui.warn(
376 (
376 (
377 b"Bad entry: %s has an unknown deltabase (%s)\n"
377 b"Bad entry: %s has an unknown deltabase (%s)\n"
378 % (short(node), short(deltabase))
378 % (short(node), short(deltabase))
379 )
379 )
380 )
380 )
381 failures += 1
381 failures += 1
382 break
382 break
383
383
384 if deltabase in seen:
384 if deltabase in seen:
385 ui.warn(
385 ui.warn(
386 (
386 (
387 b"Bad entry: %s has a cycle (at %s)\n"
387 b"Bad entry: %s has a cycle (at %s)\n"
388 % (short(node), short(deltabase))
388 % (short(node), short(deltabase))
389 )
389 )
390 )
390 )
391 failures += 1
391 failures += 1
392 break
392 break
393
393
394 current = deltabase
394 current = deltabase
395 seen.add(current)
395 seen.add(current)
396 deltabase = bases[current]
396 deltabase = bases[current]
397 # Since ``node`` begins a valid chain, reset/memoize its base to nullid
397 # Since ``node`` begins a valid chain, reset/memoize its base to nullid
398 # so we don't traverse it again.
398 # so we don't traverse it again.
399 bases[node] = sha1nodeconstants.nullid
399 bases[node] = sha1nodeconstants.nullid
400 return failures
400 return failures
401
401
402
402
403 def dumpdeltachain(ui, deltachain, **opts):
403 def dumpdeltachain(ui, deltachain, **opts):
404 hashformatter = hex
404 hashformatter = hex
405 hashlen = 40
405 hashlen = 40
406
406
407 lastfilename = None
407 lastfilename = None
408 for filename, node, filename, deltabasenode, delta in deltachain:
408 for filename, node, filename, deltabasenode, delta in deltachain:
409 if filename != lastfilename:
409 if filename != lastfilename:
410 ui.write(b"\n%s\n" % filename)
410 ui.write(b"\n%s\n" % filename)
411 lastfilename = filename
411 lastfilename = filename
412 ui.write(
412 ui.write(
413 b"%s %s %s %s\n"
413 b"%s %s %s %s\n"
414 % (
414 % (
415 b"Node".ljust(hashlen),
415 b"Node".ljust(hashlen),
416 b"Delta Base".ljust(hashlen),
416 b"Delta Base".ljust(hashlen),
417 b"Delta SHA1".ljust(hashlen),
417 b"Delta SHA1".ljust(hashlen),
418 b"Delta Length".ljust(6),
418 b"Delta Length".ljust(6),
419 )
419 )
420 )
420 )
421
421
422 ui.write(
422 ui.write(
423 b"%s %s %s %d\n"
423 b"%s %s %s %d\n"
424 % (
424 % (
425 hashformatter(node),
425 hashformatter(node),
426 hashformatter(deltabasenode),
426 hashformatter(deltabasenode),
427 hex(hashutil.sha1(delta).digest()),
427 hex(hashutil.sha1(delta).digest()),
428 len(delta),
428 len(delta),
429 )
429 )
430 )
430 )
431
431
432
432
433 def debughistorypack(ui, path):
433 def debughistorypack(ui, path):
434 if b'.hist' in path:
434 if b'.hist' in path:
435 path = path[: path.index(b'.hist')]
435 path = path[: path.index(b'.hist')]
436 hpack = historypack.historypack(path)
436 hpack = historypack.historypack(path)
437
437
438 lastfilename = None
438 lastfilename = None
439 for entry in hpack.iterentries():
439 for entry in hpack.iterentries():
440 filename, node, p1node, p2node, linknode, copyfrom = entry
440 filename, node, p1node, p2node, linknode, copyfrom = entry
441 if filename != lastfilename:
441 if filename != lastfilename:
442 ui.write(b"\n%s\n" % filename)
442 ui.write(b"\n%s\n" % filename)
443 ui.write(
443 ui.write(
444 b"%s%s%s%s%s\n"
444 b"%s%s%s%s%s\n"
445 % (
445 % (
446 b"Node".ljust(14),
446 b"Node".ljust(14),
447 b"P1 Node".ljust(14),
447 b"P1 Node".ljust(14),
448 b"P2 Node".ljust(14),
448 b"P2 Node".ljust(14),
449 b"Link Node".ljust(14),
449 b"Link Node".ljust(14),
450 b"Copy From",
450 b"Copy From",
451 )
451 )
452 )
452 )
453 lastfilename = filename
453 lastfilename = filename
454 ui.write(
454 ui.write(
455 b"%s %s %s %s %s\n"
455 b"%s %s %s %s %s\n"
456 % (
456 % (
457 short(node),
457 short(node),
458 short(p1node),
458 short(p1node),
459 short(p2node),
459 short(p2node),
460 short(linknode),
460 short(linknode),
461 copyfrom,
461 copyfrom,
462 )
462 )
463 )
463 )
464
464
465
465
466 def debugwaitonrepack(repo):
466 def debugwaitonrepack(repo):
467 with lockmod.lock(repack.repacklockvfs(repo), b"repacklock", timeout=-1):
467 with lockmod.lock(repack.repacklockvfs(repo), b"repacklock", timeout=-1):
468 return
468 return
469
469
470
470
471 def debugwaitonprefetch(repo):
471 def debugwaitonprefetch(repo):
472 with repo._lock(
472 with repo._lock(
473 repo.svfs,
473 repo.svfs,
474 b"prefetchlock",
474 b"prefetchlock",
475 True,
475 True,
476 None,
476 None,
477 None,
477 None,
478 _(b'prefetching in %s') % repo.origroot,
478 _(b'prefetching in %s') % repo.origroot,
479 ):
479 ):
480 pass
480 pass
@@ -1,472 +1,476 b''
1 # remotefilelog.py - filelog implementation where filelog history is stored
1 # remotefilelog.py - filelog implementation where filelog history is stored
2 # remotely
2 # remotely
3 #
3 #
4 # Copyright 2013 Facebook, Inc.
4 # Copyright 2013 Facebook, Inc.
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 import collections
9 import collections
10
10
11 from typing import (
12 Iterator,
13 )
14
11 from mercurial.node import bin
15 from mercurial.node import bin
12 from mercurial.i18n import _
16 from mercurial.i18n import _
13 from mercurial import (
17 from mercurial import (
14 ancestor,
18 ancestor,
15 error,
19 error,
16 mdiff,
20 mdiff,
17 revlog,
21 revlog,
18 )
22 )
19 from mercurial.utils import storageutil
23 from mercurial.utils import storageutil
20 from mercurial.revlogutils import flagutil
24 from mercurial.revlogutils import flagutil
21
25
22 from . import (
26 from . import (
23 constants,
27 constants,
24 shallowutil,
28 shallowutil,
25 )
29 )
26
30
27
31
28 class remotefilelognodemap:
32 class remotefilelognodemap:
29 def __init__(self, filename, store):
33 def __init__(self, filename, store):
30 self._filename = filename
34 self._filename = filename
31 self._store = store
35 self._store = store
32
36
33 def __contains__(self, node):
37 def __contains__(self, node):
34 missing = self._store.getmissing([(self._filename, node)])
38 missing = self._store.getmissing([(self._filename, node)])
35 return not bool(missing)
39 return not bool(missing)
36
40
37 def __get__(self, node):
41 def __get__(self, node):
38 if node not in self:
42 if node not in self:
39 raise KeyError(node)
43 raise KeyError(node)
40 return node
44 return node
41
45
42
46
43 class remotefilelog:
47 class remotefilelog:
44 _flagserrorclass = error.RevlogError
48 _flagserrorclass = error.RevlogError
45
49
46 def __init__(self, opener, path, repo):
50 def __init__(self, opener, path, repo):
47 self.opener = opener
51 self.opener = opener
48 self.filename = path
52 self.filename = path
49 self.repo = repo
53 self.repo = repo
50 self.nodemap = remotefilelognodemap(self.filename, repo.contentstore)
54 self.nodemap = remotefilelognodemap(self.filename, repo.contentstore)
51
55
52 self.version = 1
56 self.version = 1
53
57
54 self._flagprocessors = dict(flagutil.flagprocessors)
58 self._flagprocessors = dict(flagutil.flagprocessors)
55
59
56 def read(self, node):
60 def read(self, node):
57 """returns the file contents at this node"""
61 """returns the file contents at this node"""
58 t = self.revision(node)
62 t = self.revision(node)
59 if not t.startswith(b'\1\n'):
63 if not t.startswith(b'\1\n'):
60 return t
64 return t
61 s = t.index(b'\1\n', 2)
65 s = t.index(b'\1\n', 2)
62 return t[s + 2 :]
66 return t[s + 2 :]
63
67
64 def add(self, text, meta, transaction, linknode, p1=None, p2=None):
68 def add(self, text, meta, transaction, linknode, p1=None, p2=None):
65 # hash with the metadata, like in vanilla filelogs
69 # hash with the metadata, like in vanilla filelogs
66 hashtext = shallowutil.createrevlogtext(
70 hashtext = shallowutil.createrevlogtext(
67 text, meta.get(b'copy'), meta.get(b'copyrev')
71 text, meta.get(b'copy'), meta.get(b'copyrev')
68 )
72 )
69 node = storageutil.hashrevisionsha1(hashtext, p1, p2)
73 node = storageutil.hashrevisionsha1(hashtext, p1, p2)
70 return self.addrevision(
74 return self.addrevision(
71 hashtext, transaction, linknode, p1, p2, node=node
75 hashtext, transaction, linknode, p1, p2, node=node
72 )
76 )
73
77
74 def _createfileblob(self, text, meta, flags, p1, p2, node, linknode):
78 def _createfileblob(self, text, meta, flags, p1, p2, node, linknode):
75 # text passed to "_createfileblob" does not include filelog metadata
79 # text passed to "_createfileblob" does not include filelog metadata
76 header = shallowutil.buildfileblobheader(len(text), flags)
80 header = shallowutil.buildfileblobheader(len(text), flags)
77 data = b"%s\0%s" % (header, text)
81 data = b"%s\0%s" % (header, text)
78
82
79 realp1 = p1
83 realp1 = p1
80 copyfrom = b""
84 copyfrom = b""
81 if meta and b'copy' in meta:
85 if meta and b'copy' in meta:
82 copyfrom = meta[b'copy']
86 copyfrom = meta[b'copy']
83 realp1 = bin(meta[b'copyrev'])
87 realp1 = bin(meta[b'copyrev'])
84
88
85 data += b"%s%s%s%s%s\0" % (node, realp1, p2, linknode, copyfrom)
89 data += b"%s%s%s%s%s\0" % (node, realp1, p2, linknode, copyfrom)
86
90
87 visited = set()
91 visited = set()
88
92
89 pancestors = {}
93 pancestors = {}
90 queue = []
94 queue = []
91 if realp1 != self.repo.nullid:
95 if realp1 != self.repo.nullid:
92 p1flog = self
96 p1flog = self
93 if copyfrom:
97 if copyfrom:
94 p1flog = remotefilelog(self.opener, copyfrom, self.repo)
98 p1flog = remotefilelog(self.opener, copyfrom, self.repo)
95
99
96 pancestors.update(p1flog.ancestormap(realp1))
100 pancestors.update(p1flog.ancestormap(realp1))
97 queue.append(realp1)
101 queue.append(realp1)
98 visited.add(realp1)
102 visited.add(realp1)
99 if p2 != self.repo.nullid:
103 if p2 != self.repo.nullid:
100 pancestors.update(self.ancestormap(p2))
104 pancestors.update(self.ancestormap(p2))
101 queue.append(p2)
105 queue.append(p2)
102 visited.add(p2)
106 visited.add(p2)
103
107
104 ancestortext = b""
108 ancestortext = b""
105
109
106 # add the ancestors in topological order
110 # add the ancestors in topological order
107 while queue:
111 while queue:
108 c = queue.pop(0)
112 c = queue.pop(0)
109 pa1, pa2, ancestorlinknode, pacopyfrom = pancestors[c]
113 pa1, pa2, ancestorlinknode, pacopyfrom = pancestors[c]
110
114
111 pacopyfrom = pacopyfrom or b''
115 pacopyfrom = pacopyfrom or b''
112 ancestortext += b"%s%s%s%s%s\0" % (
116 ancestortext += b"%s%s%s%s%s\0" % (
113 c,
117 c,
114 pa1,
118 pa1,
115 pa2,
119 pa2,
116 ancestorlinknode,
120 ancestorlinknode,
117 pacopyfrom,
121 pacopyfrom,
118 )
122 )
119
123
120 if pa1 != self.repo.nullid and pa1 not in visited:
124 if pa1 != self.repo.nullid and pa1 not in visited:
121 queue.append(pa1)
125 queue.append(pa1)
122 visited.add(pa1)
126 visited.add(pa1)
123 if pa2 != self.repo.nullid and pa2 not in visited:
127 if pa2 != self.repo.nullid and pa2 not in visited:
124 queue.append(pa2)
128 queue.append(pa2)
125 visited.add(pa2)
129 visited.add(pa2)
126
130
127 data += ancestortext
131 data += ancestortext
128
132
129 return data
133 return data
130
134
131 def addrevision(
135 def addrevision(
132 self,
136 self,
133 text,
137 text,
134 transaction,
138 transaction,
135 linknode,
139 linknode,
136 p1,
140 p1,
137 p2,
141 p2,
138 cachedelta=None,
142 cachedelta=None,
139 node=None,
143 node=None,
140 flags=revlog.REVIDX_DEFAULT_FLAGS,
144 flags=revlog.REVIDX_DEFAULT_FLAGS,
141 sidedata=None,
145 sidedata=None,
142 ):
146 ):
143 # text passed to "addrevision" includes hg filelog metadata header
147 # text passed to "addrevision" includes hg filelog metadata header
144 if node is None:
148 if node is None:
145 node = storageutil.hashrevisionsha1(text, p1, p2)
149 node = storageutil.hashrevisionsha1(text, p1, p2)
146
150
147 meta, metaoffset = storageutil.parsemeta(text)
151 meta, metaoffset = storageutil.parsemeta(text)
148 rawtext, validatehash = flagutil.processflagswrite(
152 rawtext, validatehash = flagutil.processflagswrite(
149 self,
153 self,
150 text,
154 text,
151 flags,
155 flags,
152 )
156 )
153 return self.addrawrevision(
157 return self.addrawrevision(
154 rawtext,
158 rawtext,
155 transaction,
159 transaction,
156 linknode,
160 linknode,
157 p1,
161 p1,
158 p2,
162 p2,
159 node,
163 node,
160 flags,
164 flags,
161 cachedelta,
165 cachedelta,
162 _metatuple=(meta, metaoffset),
166 _metatuple=(meta, metaoffset),
163 )
167 )
164
168
165 def addrawrevision(
169 def addrawrevision(
166 self,
170 self,
167 rawtext,
171 rawtext,
168 transaction,
172 transaction,
169 linknode,
173 linknode,
170 p1,
174 p1,
171 p2,
175 p2,
172 node,
176 node,
173 flags,
177 flags,
174 cachedelta=None,
178 cachedelta=None,
175 _metatuple=None,
179 _metatuple=None,
176 ):
180 ):
177 if _metatuple:
181 if _metatuple:
178 # _metatuple: used by "addrevision" internally by remotefilelog
182 # _metatuple: used by "addrevision" internally by remotefilelog
179 # meta was parsed confidently
183 # meta was parsed confidently
180 meta, metaoffset = _metatuple
184 meta, metaoffset = _metatuple
181 else:
185 else:
182 # not from self.addrevision, but something else (repo._filecommit)
186 # not from self.addrevision, but something else (repo._filecommit)
183 # calls addrawrevision directly. remotefilelog needs to get and
187 # calls addrawrevision directly. remotefilelog needs to get and
184 # strip filelog metadata.
188 # strip filelog metadata.
185 # we don't have confidence about whether rawtext contains filelog
189 # we don't have confidence about whether rawtext contains filelog
186 # metadata or not (flag processor could replace it), so we just
190 # metadata or not (flag processor could replace it), so we just
187 # parse it as best-effort.
191 # parse it as best-effort.
188 # in LFS (flags != 0)'s case, the best way is to call LFS code to
192 # in LFS (flags != 0)'s case, the best way is to call LFS code to
189 # get the meta information, instead of storageutil.parsemeta.
193 # get the meta information, instead of storageutil.parsemeta.
190 meta, metaoffset = storageutil.parsemeta(rawtext)
194 meta, metaoffset = storageutil.parsemeta(rawtext)
191 if flags != 0:
195 if flags != 0:
192 # when flags != 0, be conservative and do not mangle rawtext, since
196 # when flags != 0, be conservative and do not mangle rawtext, since
193 # a read flag processor expects the text not being mangled at all.
197 # a read flag processor expects the text not being mangled at all.
194 metaoffset = 0
198 metaoffset = 0
195 if metaoffset:
199 if metaoffset:
196 # remotefilelog fileblob stores copy metadata in its ancestortext,
200 # remotefilelog fileblob stores copy metadata in its ancestortext,
197 # not its main blob. so we need to remove filelog metadata
201 # not its main blob. so we need to remove filelog metadata
198 # (containing copy information) from text.
202 # (containing copy information) from text.
199 blobtext = rawtext[metaoffset:]
203 blobtext = rawtext[metaoffset:]
200 else:
204 else:
201 blobtext = rawtext
205 blobtext = rawtext
202 data = self._createfileblob(
206 data = self._createfileblob(
203 blobtext, meta, flags, p1, p2, node, linknode
207 blobtext, meta, flags, p1, p2, node, linknode
204 )
208 )
205 self.repo.contentstore.addremotefilelognode(self.filename, node, data)
209 self.repo.contentstore.addremotefilelognode(self.filename, node, data)
206
210
207 return node
211 return node
208
212
209 def renamed(self, node):
213 def renamed(self, node):
210 ancestors = self.repo.metadatastore.getancestors(self.filename, node)
214 ancestors = self.repo.metadatastore.getancestors(self.filename, node)
211 p1, p2, linknode, copyfrom = ancestors[node]
215 p1, p2, linknode, copyfrom = ancestors[node]
212 if copyfrom:
216 if copyfrom:
213 return (copyfrom, p1)
217 return (copyfrom, p1)
214
218
215 return False
219 return False
216
220
217 def size(self, node):
221 def size(self, node):
218 """return the size of a given revision"""
222 """return the size of a given revision"""
219 return len(self.read(node))
223 return len(self.read(node))
220
224
221 rawsize = size
225 rawsize = size
222
226
223 def cmp(self, node, text):
227 def cmp(self, node, text):
224 """compare text with a given file revision
228 """compare text with a given file revision
225
229
226 returns True if text is different than what is stored.
230 returns True if text is different than what is stored.
227 """
231 """
228
232
229 if node == self.repo.nullid:
233 if node == self.repo.nullid:
230 return True
234 return True
231
235
232 nodetext = self.read(node)
236 nodetext = self.read(node)
233 return nodetext != text
237 return nodetext != text
234
238
235 def __nonzero__(self):
239 def __nonzero__(self):
236 return True
240 return True
237
241
238 __bool__ = __nonzero__
242 __bool__ = __nonzero__
239
243
240 def __len__(self):
244 def __len__(self):
241 if self.filename in (b'.hgtags', b'.hgsub', b'.hgsubstate'):
245 if self.filename in (b'.hgtags', b'.hgsub', b'.hgsubstate'):
242 # Global tag and subrepository support require access to the
246 # Global tag and subrepository support require access to the
243 # file history for various performance sensitive operations.
247 # file history for various performance sensitive operations.
244 # excludepattern should be used for repositories depending on
248 # excludepattern should be used for repositories depending on
245 # those features to fallback to regular filelog.
249 # those features to fallback to regular filelog.
246 return 0
250 return 0
247
251
248 raise RuntimeError(b"len not supported")
252 raise RuntimeError(b"len not supported")
249
253
250 def heads(self):
254 def heads(self):
251 # Fake heads of the filelog to satisfy hgweb.
255 # Fake heads of the filelog to satisfy hgweb.
252 return []
256 return []
253
257
254 def empty(self):
258 def empty(self):
255 return False
259 return False
256
260
257 def flags(self, node):
261 def flags(self, node):
258 if isinstance(node, int):
262 if isinstance(node, int):
259 raise error.ProgrammingError(
263 raise error.ProgrammingError(
260 b'remotefilelog does not accept integer rev for flags'
264 b'remotefilelog does not accept integer rev for flags'
261 )
265 )
262 store = self.repo.contentstore
266 store = self.repo.contentstore
263 return store.getmeta(self.filename, node).get(constants.METAKEYFLAG, 0)
267 return store.getmeta(self.filename, node).get(constants.METAKEYFLAG, 0)
264
268
265 def parents(self, node):
269 def parents(self, node):
266 if node == self.repo.nullid:
270 if node == self.repo.nullid:
267 return self.repo.nullid, self.repo.nullid
271 return self.repo.nullid, self.repo.nullid
268
272
269 ancestormap = self.repo.metadatastore.getancestors(self.filename, node)
273 ancestormap = self.repo.metadatastore.getancestors(self.filename, node)
270 p1, p2, linknode, copyfrom = ancestormap[node]
274 p1, p2, linknode, copyfrom = ancestormap[node]
271 if copyfrom:
275 if copyfrom:
272 p1 = self.repo.nullid
276 p1 = self.repo.nullid
273
277
274 return p1, p2
278 return p1, p2
275
279
276 def parentrevs(self, rev):
280 def parentrevs(self, rev):
277 # TODO(augie): this is a node and should be a rev, but for now
281 # TODO(augie): this is a node and should be a rev, but for now
278 # nothing in core seems to actually break.
282 # nothing in core seems to actually break.
279 return self.parents(rev)
283 return self.parents(rev)
280
284
281 def linknode(self, node):
285 def linknode(self, node):
282 ancestormap = self.repo.metadatastore.getancestors(self.filename, node)
286 ancestormap = self.repo.metadatastore.getancestors(self.filename, node)
283 p1, p2, linknode, copyfrom = ancestormap[node]
287 p1, p2, linknode, copyfrom = ancestormap[node]
284 return linknode
288 return linknode
285
289
286 def linkrev(self, node):
290 def linkrev(self, node):
287 return self.repo.unfiltered().changelog.rev(self.linknode(node))
291 return self.repo.unfiltered().changelog.rev(self.linknode(node))
288
292
289 def emitrevisions(
293 def emitrevisions(
290 self,
294 self,
291 nodes,
295 nodes,
292 nodesorder=None,
296 nodesorder=None,
293 revisiondata=False,
297 revisiondata=False,
294 assumehaveparentrevisions=False,
298 assumehaveparentrevisions=False,
295 deltaprevious=False,
299 deltaprevious=False,
296 deltamode=None,
300 deltamode=None,
297 sidedata_helpers=None,
301 sidedata_helpers=None,
298 debug_info=None,
302 debug_info=None,
299 ):
303 ) -> Iterator[revlog.RevLogRevisionDelta]:
300 # we don't use any of these parameters here
304 # we don't use any of these parameters here
301 del nodesorder, revisiondata, assumehaveparentrevisions, deltaprevious
305 del nodesorder, revisiondata, assumehaveparentrevisions, deltaprevious
302 del deltamode
306 del deltamode
303 prevnode = None
307 prevnode = None
304 for node in nodes:
308 for node in nodes:
305 p1, p2 = self.parents(node)
309 p1, p2 = self.parents(node)
306 if prevnode is None:
310 if prevnode is None:
307 basenode = prevnode = p1
311 basenode = prevnode = p1
308 if basenode == node:
312 if basenode == node:
309 basenode = self.repo.nullid
313 basenode = self.repo.nullid
310 if basenode != self.repo.nullid:
314 if basenode != self.repo.nullid:
311 revision = None
315 revision = None
312 delta = self.revdiff(basenode, node)
316 delta = self.revdiff(basenode, node)
313 else:
317 else:
314 revision = self.rawdata(node)
318 revision = self.rawdata(node)
315 delta = None
319 delta = None
316 yield revlog.revlogrevisiondelta(
320 yield revlog.revlogrevisiondelta(
317 node=node,
321 node=node,
318 p1node=p1,
322 p1node=p1,
319 p2node=p2,
323 p2node=p2,
320 linknode=self.linknode(node),
324 linknode=self.linknode(node),
321 basenode=basenode,
325 basenode=basenode,
322 flags=self.flags(node),
326 flags=self.flags(node),
323 baserevisionsize=None,
327 baserevisionsize=None,
324 revision=revision,
328 revision=revision,
325 delta=delta,
329 delta=delta,
326 # Sidedata is not supported yet
330 # Sidedata is not supported yet
327 sidedata=None,
331 sidedata=None,
328 # Protocol flags are not used yet
332 # Protocol flags are not used yet
329 protocol_flags=0,
333 protocol_flags=0,
330 )
334 )
331
335
332 def revdiff(self, node1, node2):
336 def revdiff(self, node1, node2):
333 return mdiff.textdiff(self.rawdata(node1), self.rawdata(node2))
337 return mdiff.textdiff(self.rawdata(node1), self.rawdata(node2))
334
338
335 def lookup(self, node):
339 def lookup(self, node):
336 if len(node) == 40:
340 if len(node) == 40:
337 node = bin(node)
341 node = bin(node)
338 if len(node) != 20:
342 if len(node) != 20:
339 raise error.LookupError(
343 raise error.LookupError(
340 node, self.filename, _(b'invalid lookup input')
344 node, self.filename, _(b'invalid lookup input')
341 )
345 )
342
346
343 return node
347 return node
344
348
345 def rev(self, node):
349 def rev(self, node):
346 # This is a hack to make TortoiseHG work.
350 # This is a hack to make TortoiseHG work.
347 return node
351 return node
348
352
349 def node(self, rev):
353 def node(self, rev):
350 # This is a hack.
354 # This is a hack.
351 if isinstance(rev, int):
355 if isinstance(rev, int):
352 raise error.ProgrammingError(
356 raise error.ProgrammingError(
353 b'remotefilelog does not convert integer rev to node'
357 b'remotefilelog does not convert integer rev to node'
354 )
358 )
355 return rev
359 return rev
356
360
357 def revision(self, node, raw=False):
361 def revision(self, node, raw=False):
358 """returns the revlog contents at this node.
362 """returns the revlog contents at this node.
359 this includes the meta data traditionally included in file revlogs.
363 this includes the meta data traditionally included in file revlogs.
360 this is generally only used for bundling and communicating with vanilla
364 this is generally only used for bundling and communicating with vanilla
361 hg clients.
365 hg clients.
362 """
366 """
363 if node == self.repo.nullid:
367 if node == self.repo.nullid:
364 return b""
368 return b""
365 if len(node) != 20:
369 if len(node) != 20:
366 raise error.LookupError(
370 raise error.LookupError(
367 node, self.filename, _(b'invalid revision input')
371 node, self.filename, _(b'invalid revision input')
368 )
372 )
369 if (
373 if (
370 node == self.repo.nodeconstants.wdirid
374 node == self.repo.nodeconstants.wdirid
371 or node in self.repo.nodeconstants.wdirfilenodeids
375 or node in self.repo.nodeconstants.wdirfilenodeids
372 ):
376 ):
373 raise error.WdirUnsupported
377 raise error.WdirUnsupported
374
378
375 store = self.repo.contentstore
379 store = self.repo.contentstore
376 rawtext = store.get(self.filename, node)
380 rawtext = store.get(self.filename, node)
377 if raw:
381 if raw:
378 return rawtext
382 return rawtext
379 flags = store.getmeta(self.filename, node).get(constants.METAKEYFLAG, 0)
383 flags = store.getmeta(self.filename, node).get(constants.METAKEYFLAG, 0)
380 if flags == 0:
384 if flags == 0:
381 return rawtext
385 return rawtext
382 return flagutil.processflagsread(self, rawtext, flags)[0]
386 return flagutil.processflagsread(self, rawtext, flags)[0]
383
387
384 def rawdata(self, node):
388 def rawdata(self, node):
385 return self.revision(node, raw=False)
389 return self.revision(node, raw=False)
386
390
387 def ancestormap(self, node):
391 def ancestormap(self, node):
388 return self.repo.metadatastore.getancestors(self.filename, node)
392 return self.repo.metadatastore.getancestors(self.filename, node)
389
393
390 def ancestor(self, a, b):
394 def ancestor(self, a, b):
391 if a == self.repo.nullid or b == self.repo.nullid:
395 if a == self.repo.nullid or b == self.repo.nullid:
392 return self.repo.nullid
396 return self.repo.nullid
393
397
394 revmap, parentfunc = self._buildrevgraph(a, b)
398 revmap, parentfunc = self._buildrevgraph(a, b)
395 nodemap = {v: k for (k, v) in revmap.items()}
399 nodemap = {v: k for (k, v) in revmap.items()}
396
400
397 ancs = ancestor.ancestors(parentfunc, revmap[a], revmap[b])
401 ancs = ancestor.ancestors(parentfunc, revmap[a], revmap[b])
398 if ancs:
402 if ancs:
399 # choose a consistent winner when there's a tie
403 # choose a consistent winner when there's a tie
400 return min(map(nodemap.__getitem__, ancs))
404 return min(map(nodemap.__getitem__, ancs))
401 return self.repo.nullid
405 return self.repo.nullid
402
406
403 def commonancestorsheads(self, a, b):
407 def commonancestorsheads(self, a, b):
404 """calculate all the heads of the common ancestors of nodes a and b"""
408 """calculate all the heads of the common ancestors of nodes a and b"""
405
409
406 if a == self.repo.nullid or b == self.repo.nullid:
410 if a == self.repo.nullid or b == self.repo.nullid:
407 return self.repo.nullid
411 return self.repo.nullid
408
412
409 revmap, parentfunc = self._buildrevgraph(a, b)
413 revmap, parentfunc = self._buildrevgraph(a, b)
410 nodemap = {v: k for (k, v) in revmap.items()}
414 nodemap = {v: k for (k, v) in revmap.items()}
411
415
412 ancs = ancestor.commonancestorsheads(parentfunc, revmap[a], revmap[b])
416 ancs = ancestor.commonancestorsheads(parentfunc, revmap[a], revmap[b])
413 return map(nodemap.__getitem__, ancs)
417 return map(nodemap.__getitem__, ancs)
414
418
415 def _buildrevgraph(self, a, b):
419 def _buildrevgraph(self, a, b):
416 """Builds a numeric revision graph for the given two nodes.
420 """Builds a numeric revision graph for the given two nodes.
417 Returns a node->rev map and a rev->[revs] parent function.
421 Returns a node->rev map and a rev->[revs] parent function.
418 """
422 """
419 amap = self.ancestormap(a)
423 amap = self.ancestormap(a)
420 bmap = self.ancestormap(b)
424 bmap = self.ancestormap(b)
421
425
422 # Union the two maps
426 # Union the two maps
423 parentsmap = collections.defaultdict(list)
427 parentsmap = collections.defaultdict(list)
424 allparents = set()
428 allparents = set()
425 for mapping in (amap, bmap):
429 for mapping in (amap, bmap):
426 for node, pdata in mapping.items():
430 for node, pdata in mapping.items():
427 parents = parentsmap[node]
431 parents = parentsmap[node]
428 p1, p2, linknode, copyfrom = pdata
432 p1, p2, linknode, copyfrom = pdata
429 # Don't follow renames (copyfrom).
433 # Don't follow renames (copyfrom).
430 # remotefilectx.ancestor does that.
434 # remotefilectx.ancestor does that.
431 if p1 != self.repo.nullid and not copyfrom:
435 if p1 != self.repo.nullid and not copyfrom:
432 parents.append(p1)
436 parents.append(p1)
433 allparents.add(p1)
437 allparents.add(p1)
434 if p2 != self.repo.nullid:
438 if p2 != self.repo.nullid:
435 parents.append(p2)
439 parents.append(p2)
436 allparents.add(p2)
440 allparents.add(p2)
437
441
438 # Breadth first traversal to build linkrev graph
442 # Breadth first traversal to build linkrev graph
439 parentrevs = collections.defaultdict(list)
443 parentrevs = collections.defaultdict(list)
440 revmap = {}
444 revmap = {}
441 queue = collections.deque(
445 queue = collections.deque(
442 ((None, n) for n in parentsmap if n not in allparents)
446 ((None, n) for n in parentsmap if n not in allparents)
443 )
447 )
444 while queue:
448 while queue:
445 prevrev, current = queue.pop()
449 prevrev, current = queue.pop()
446 if current in revmap:
450 if current in revmap:
447 if prevrev:
451 if prevrev:
448 parentrevs[prevrev].append(revmap[current])
452 parentrevs[prevrev].append(revmap[current])
449 continue
453 continue
450
454
451 # Assign linkrevs in reverse order, so start at
455 # Assign linkrevs in reverse order, so start at
452 # len(parentsmap) and work backwards.
456 # len(parentsmap) and work backwards.
453 currentrev = len(parentsmap) - len(revmap) - 1
457 currentrev = len(parentsmap) - len(revmap) - 1
454 revmap[current] = currentrev
458 revmap[current] = currentrev
455
459
456 if prevrev:
460 if prevrev:
457 parentrevs[prevrev].append(currentrev)
461 parentrevs[prevrev].append(currentrev)
458
462
459 for parent in parentsmap.get(current):
463 for parent in parentsmap.get(current):
460 queue.appendleft((currentrev, parent))
464 queue.appendleft((currentrev, parent))
461
465
462 return revmap, parentrevs.__getitem__
466 return revmap, parentrevs.__getitem__
463
467
464 def strip(self, minlink, transaction):
468 def strip(self, minlink, transaction):
465 pass
469 pass
466
470
467 # misc unused things
471 # misc unused things
468 def files(self):
472 def files(self):
469 return []
473 return []
470
474
471 def checksize(self):
475 def checksize(self):
472 return 0, 0
476 return 0, 0
@@ -1,319 +1,328 b''
1 # filelog.py - file history class for mercurial
1 # filelog.py - file history class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import typing
9 import typing
10
10
11 from typing import (
12 Iterable,
13 Iterator,
14 )
15
11 from .i18n import _
16 from .i18n import _
12 from .node import nullrev
17 from .node import nullrev
13 from . import (
18 from . import (
14 error,
19 error,
15 revlog,
20 revlog,
16 )
21 )
17 from .interfaces import (
22 from .interfaces import (
18 repository,
23 repository,
19 util as interfaceutil,
24 util as interfaceutil,
20 )
25 )
21 from .utils import storageutil
26 from .utils import storageutil
22 from .revlogutils import (
27 from .revlogutils import (
23 constants as revlog_constants,
28 constants as revlog_constants,
24 rewrite,
29 rewrite,
25 )
30 )
26
31
27
32
28 class FileLog:
33 class FileLog:
34 _revlog: revlog.revlog
35 nullid: bytes
36 _fix_issue6528: bool
37
29 def __init__(self, opener, path, try_split=False):
38 def __init__(self, opener, path, try_split=False):
30 self._revlog = revlog.revlog(
39 self._revlog = revlog.revlog(
31 opener,
40 opener,
32 # XXX should use the unencoded path
41 # XXX should use the unencoded path
33 target=(revlog_constants.KIND_FILELOG, path),
42 target=(revlog_constants.KIND_FILELOG, path),
34 radix=b'/'.join((b'data', path)),
43 radix=b'/'.join((b'data', path)),
35 censorable=True,
44 censorable=True,
36 canonical_parent_order=False, # see comment in revlog.py
45 canonical_parent_order=False, # see comment in revlog.py
37 try_split=try_split,
46 try_split=try_split,
38 )
47 )
39 # Full name of the user visible file, relative to the repository root.
48 # Full name of the user visible file, relative to the repository root.
40 # Used by LFS.
49 # Used by LFS.
41 self._revlog.filename = path
50 self._revlog.filename = path
42 self.nullid = self._revlog.nullid
51 self.nullid = self._revlog.nullid
43 opts = opener.options
52 opts = opener.options
44 self._fix_issue6528 = opts.get(b'issue6528.fix-incoming', True)
53 self._fix_issue6528 = opts.get(b'issue6528.fix-incoming', True)
45
54
46 def get_revlog(self):
55 def get_revlog(self) -> revlog.revlog:
47 """return an actual revlog instance if any
56 """return an actual revlog instance if any
48
57
49 This exist because a lot of code leverage the fact the underlying
58 This exist because a lot of code leverage the fact the underlying
50 storage is a revlog for optimization, so giving simple way to access
59 storage is a revlog for optimization, so giving simple way to access
51 the revlog instance helps such code.
60 the revlog instance helps such code.
52 """
61 """
53 return self._revlog
62 return self._revlog
54
63
55 def __len__(self):
64 def __len__(self) -> int:
56 return len(self._revlog)
65 return len(self._revlog)
57
66
58 def __iter__(self):
67 def __iter__(self) -> Iterator[int]:
59 return self._revlog.__iter__()
68 return self._revlog.__iter__()
60
69
61 def hasnode(self, node):
70 def hasnode(self, node):
62 if node in (self.nullid, nullrev):
71 if node in (self.nullid, nullrev):
63 return False
72 return False
64
73
65 try:
74 try:
66 self._revlog.rev(node)
75 self._revlog.rev(node)
67 return True
76 return True
68 except (TypeError, ValueError, IndexError, error.LookupError):
77 except (TypeError, ValueError, IndexError, error.LookupError):
69 return False
78 return False
70
79
71 def revs(self, start=0, stop=None):
80 def revs(self, start=0, stop=None):
72 return self._revlog.revs(start=start, stop=stop)
81 return self._revlog.revs(start=start, stop=stop)
73
82
74 def parents(self, node):
83 def parents(self, node):
75 return self._revlog.parents(node)
84 return self._revlog.parents(node)
76
85
77 def parentrevs(self, rev):
86 def parentrevs(self, rev):
78 return self._revlog.parentrevs(rev)
87 return self._revlog.parentrevs(rev)
79
88
80 def rev(self, node):
89 def rev(self, node):
81 return self._revlog.rev(node)
90 return self._revlog.rev(node)
82
91
83 def node(self, rev):
92 def node(self, rev):
84 return self._revlog.node(rev)
93 return self._revlog.node(rev)
85
94
86 def lookup(self, node):
95 def lookup(self, node):
87 return storageutil.fileidlookup(
96 return storageutil.fileidlookup(
88 self._revlog, node, self._revlog.display_id
97 self._revlog, node, self._revlog.display_id
89 )
98 )
90
99
91 def linkrev(self, rev):
100 def linkrev(self, rev):
92 return self._revlog.linkrev(rev)
101 return self._revlog.linkrev(rev)
93
102
94 def commonancestorsheads(self, node1, node2):
103 def commonancestorsheads(self, node1, node2):
95 return self._revlog.commonancestorsheads(node1, node2)
104 return self._revlog.commonancestorsheads(node1, node2)
96
105
97 # Used by dagop.blockdescendants().
106 # Used by dagop.blockdescendants().
98 def descendants(self, revs):
107 def descendants(self, revs):
99 return self._revlog.descendants(revs)
108 return self._revlog.descendants(revs)
100
109
101 def heads(self, start=None, stop=None):
110 def heads(self, start=None, stop=None):
102 return self._revlog.heads(start, stop)
111 return self._revlog.heads(start, stop)
103
112
104 # Used by hgweb, children extension.
113 # Used by hgweb, children extension.
105 def children(self, node):
114 def children(self, node):
106 return self._revlog.children(node)
115 return self._revlog.children(node)
107
116
108 def iscensored(self, rev):
117 def iscensored(self, rev):
109 return self._revlog.iscensored(rev)
118 return self._revlog.iscensored(rev)
110
119
111 def revision(self, node):
120 def revision(self, node):
112 return self._revlog.revision(node)
121 return self._revlog.revision(node)
113
122
114 def rawdata(self, node):
123 def rawdata(self, node):
115 return self._revlog.rawdata(node)
124 return self._revlog.rawdata(node)
116
125
117 def emitrevisions(
126 def emitrevisions(
118 self,
127 self,
119 nodes,
128 nodes,
120 nodesorder=None,
129 nodesorder=None,
121 revisiondata=False,
130 revisiondata=False,
122 assumehaveparentrevisions=False,
131 assumehaveparentrevisions=False,
123 deltamode=repository.CG_DELTAMODE_STD,
132 deltamode=repository.CG_DELTAMODE_STD,
124 sidedata_helpers=None,
133 sidedata_helpers=None,
125 debug_info=None,
134 debug_info=None,
126 ):
135 ):
127 return self._revlog.emitrevisions(
136 return self._revlog.emitrevisions(
128 nodes,
137 nodes,
129 nodesorder=nodesorder,
138 nodesorder=nodesorder,
130 revisiondata=revisiondata,
139 revisiondata=revisiondata,
131 assumehaveparentrevisions=assumehaveparentrevisions,
140 assumehaveparentrevisions=assumehaveparentrevisions,
132 deltamode=deltamode,
141 deltamode=deltamode,
133 sidedata_helpers=sidedata_helpers,
142 sidedata_helpers=sidedata_helpers,
134 debug_info=debug_info,
143 debug_info=debug_info,
135 )
144 )
136
145
137 def addrevision(
146 def addrevision(
138 self,
147 self,
139 revisiondata,
148 revisiondata,
140 transaction,
149 transaction,
141 linkrev,
150 linkrev,
142 p1,
151 p1,
143 p2,
152 p2,
144 node=None,
153 node=None,
145 flags=revlog.REVIDX_DEFAULT_FLAGS,
154 flags=revlog.REVIDX_DEFAULT_FLAGS,
146 cachedelta=None,
155 cachedelta=None,
147 ):
156 ):
148 return self._revlog.addrevision(
157 return self._revlog.addrevision(
149 revisiondata,
158 revisiondata,
150 transaction,
159 transaction,
151 linkrev,
160 linkrev,
152 p1,
161 p1,
153 p2,
162 p2,
154 node=node,
163 node=node,
155 flags=flags,
164 flags=flags,
156 cachedelta=cachedelta,
165 cachedelta=cachedelta,
157 )
166 )
158
167
159 def addgroup(
168 def addgroup(
160 self,
169 self,
161 deltas,
170 deltas,
162 linkmapper,
171 linkmapper,
163 transaction,
172 transaction,
164 addrevisioncb=None,
173 addrevisioncb=None,
165 duplicaterevisioncb=None,
174 duplicaterevisioncb=None,
166 maybemissingparents=False,
175 maybemissingparents=False,
167 debug_info=None,
176 debug_info=None,
168 delta_base_reuse_policy=None,
177 delta_base_reuse_policy=None,
169 ):
178 ):
170 if maybemissingparents:
179 if maybemissingparents:
171 raise error.Abort(
180 raise error.Abort(
172 _(
181 _(
173 b'revlog storage does not support missing '
182 b'revlog storage does not support missing '
174 b'parents write mode'
183 b'parents write mode'
175 )
184 )
176 )
185 )
177
186
178 with self._revlog._writing(transaction):
187 with self._revlog._writing(transaction):
179 if self._fix_issue6528:
188 if self._fix_issue6528:
180 deltas = rewrite.filter_delta_issue6528(self._revlog, deltas)
189 deltas = rewrite.filter_delta_issue6528(self._revlog, deltas)
181
190
182 return self._revlog.addgroup(
191 return self._revlog.addgroup(
183 deltas,
192 deltas,
184 linkmapper,
193 linkmapper,
185 transaction,
194 transaction,
186 addrevisioncb=addrevisioncb,
195 addrevisioncb=addrevisioncb,
187 duplicaterevisioncb=duplicaterevisioncb,
196 duplicaterevisioncb=duplicaterevisioncb,
188 debug_info=debug_info,
197 debug_info=debug_info,
189 delta_base_reuse_policy=delta_base_reuse_policy,
198 delta_base_reuse_policy=delta_base_reuse_policy,
190 )
199 )
191
200
192 def getstrippoint(self, minlink):
201 def getstrippoint(self, minlink):
193 return self._revlog.getstrippoint(minlink)
202 return self._revlog.getstrippoint(minlink)
194
203
195 def strip(self, minlink, transaction):
204 def strip(self, minlink, transaction):
196 return self._revlog.strip(minlink, transaction)
205 return self._revlog.strip(minlink, transaction)
197
206
198 def censorrevision(self, tr, node, tombstone=b''):
207 def censorrevision(self, tr, node, tombstone=b''):
199 return self._revlog.censorrevision(tr, node, tombstone=tombstone)
208 return self._revlog.censorrevision(tr, node, tombstone=tombstone)
200
209
201 def files(self):
210 def files(self):
202 return self._revlog.files()
211 return self._revlog.files()
203
212
204 def read(self, node):
213 def read(self, node):
205 return storageutil.filtermetadata(self.revision(node))
214 return storageutil.filtermetadata(self.revision(node))
206
215
207 def add(self, text, meta, transaction, link, p1=None, p2=None):
216 def add(self, text, meta, transaction, link, p1=None, p2=None):
208 if meta or text.startswith(b'\1\n'):
217 if meta or text.startswith(b'\1\n'):
209 text = storageutil.packmeta(meta, text)
218 text = storageutil.packmeta(meta, text)
210 rev = self.addrevision(text, transaction, link, p1, p2)
219 rev = self.addrevision(text, transaction, link, p1, p2)
211 return self.node(rev)
220 return self.node(rev)
212
221
213 def renamed(self, node):
222 def renamed(self, node):
214 return storageutil.filerevisioncopied(self, node)
223 return storageutil.filerevisioncopied(self, node)
215
224
216 def size(self, rev):
225 def size(self, rev):
217 """return the size of a given revision"""
226 """return the size of a given revision"""
218
227
219 # for revisions with renames, we have to go the slow way
228 # for revisions with renames, we have to go the slow way
220 node = self.node(rev)
229 node = self.node(rev)
221 if self.iscensored(rev):
230 if self.iscensored(rev):
222 return 0
231 return 0
223 if self.renamed(node):
232 if self.renamed(node):
224 return len(self.read(node))
233 return len(self.read(node))
225
234
226 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
235 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
227 # XXX See also basefilectx.cmp.
236 # XXX See also basefilectx.cmp.
228 return self._revlog.size(rev)
237 return self._revlog.size(rev)
229
238
230 def cmp(self, node, text):
239 def cmp(self, node, text):
231 """compare text with a given file revision
240 """compare text with a given file revision
232
241
233 returns True if text is different than what is stored.
242 returns True if text is different than what is stored.
234 """
243 """
235 return not storageutil.filedataequivalent(self, node, text)
244 return not storageutil.filedataequivalent(self, node, text)
236
245
237 def verifyintegrity(self, state):
246 def verifyintegrity(self, state) -> Iterable[revlog.RevLogProblem]:
238 return self._revlog.verifyintegrity(state)
247 return self._revlog.verifyintegrity(state)
239
248
240 def storageinfo(
249 def storageinfo(
241 self,
250 self,
242 exclusivefiles=False,
251 exclusivefiles=False,
243 sharedfiles=False,
252 sharedfiles=False,
244 revisionscount=False,
253 revisionscount=False,
245 trackedsize=False,
254 trackedsize=False,
246 storedsize=False,
255 storedsize=False,
247 ):
256 ):
248 return self._revlog.storageinfo(
257 return self._revlog.storageinfo(
249 exclusivefiles=exclusivefiles,
258 exclusivefiles=exclusivefiles,
250 sharedfiles=sharedfiles,
259 sharedfiles=sharedfiles,
251 revisionscount=revisionscount,
260 revisionscount=revisionscount,
252 trackedsize=trackedsize,
261 trackedsize=trackedsize,
253 storedsize=storedsize,
262 storedsize=storedsize,
254 )
263 )
255
264
256 # Used by repo upgrade.
265 # Used by repo upgrade.
257 def clone(self, tr, destrevlog, **kwargs):
266 def clone(self, tr, destrevlog, **kwargs):
258 if not isinstance(destrevlog, filelog):
267 if not isinstance(destrevlog, filelog):
259 msg = b'expected filelog to clone(), not %r'
268 msg = b'expected filelog to clone(), not %r'
260 msg %= destrevlog
269 msg %= destrevlog
261 raise error.ProgrammingError(msg)
270 raise error.ProgrammingError(msg)
262
271
263 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
272 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
264
273
265
274
266 filelog = interfaceutil.implementer(repository.ifilestorage)(FileLog)
275 filelog = interfaceutil.implementer(repository.ifilestorage)(FileLog)
267
276
268 if typing.TYPE_CHECKING:
277 if typing.TYPE_CHECKING:
269 filelog = FileLog
278 filelog = FileLog
270
279
271
280
272 class narrowfilelog(filelog):
281 class narrowfilelog(filelog):
273 """Filelog variation to be used with narrow stores."""
282 """Filelog variation to be used with narrow stores."""
274
283
275 def __init__(self, opener, path, narrowmatch, try_split=False):
284 def __init__(self, opener, path, narrowmatch, try_split=False):
276 super(narrowfilelog, self).__init__(opener, path, try_split=try_split)
285 super(narrowfilelog, self).__init__(opener, path, try_split=try_split)
277 self._narrowmatch = narrowmatch
286 self._narrowmatch = narrowmatch
278
287
279 def renamed(self, node):
288 def renamed(self, node):
280 res = super(narrowfilelog, self).renamed(node)
289 res = super(narrowfilelog, self).renamed(node)
281
290
282 # Renames that come from outside the narrowspec are problematic
291 # Renames that come from outside the narrowspec are problematic
283 # because we may lack the base text for the rename. This can result
292 # because we may lack the base text for the rename. This can result
284 # in code attempting to walk the ancestry or compute a diff
293 # in code attempting to walk the ancestry or compute a diff
285 # encountering a missing revision. We address this by silently
294 # encountering a missing revision. We address this by silently
286 # removing rename metadata if the source file is outside the
295 # removing rename metadata if the source file is outside the
287 # narrow spec.
296 # narrow spec.
288 #
297 #
289 # A better solution would be to see if the base revision is available,
298 # A better solution would be to see if the base revision is available,
290 # rather than assuming it isn't.
299 # rather than assuming it isn't.
291 #
300 #
292 # An even better solution would be to teach all consumers of rename
301 # An even better solution would be to teach all consumers of rename
293 # metadata that the base revision may not be available.
302 # metadata that the base revision may not be available.
294 #
303 #
295 # TODO consider better ways of doing this.
304 # TODO consider better ways of doing this.
296 if res and not self._narrowmatch(res[0]):
305 if res and not self._narrowmatch(res[0]):
297 return None
306 return None
298
307
299 return res
308 return res
300
309
301 def size(self, rev):
310 def size(self, rev):
302 # Because we have a custom renamed() that may lie, we need to call
311 # Because we have a custom renamed() that may lie, we need to call
303 # the base renamed() to report accurate results.
312 # the base renamed() to report accurate results.
304 node = self.node(rev)
313 node = self.node(rev)
305 if super(narrowfilelog, self).renamed(node):
314 if super(narrowfilelog, self).renamed(node):
306 return len(self.read(node))
315 return len(self.read(node))
307 else:
316 else:
308 return super(narrowfilelog, self).size(rev)
317 return super(narrowfilelog, self).size(rev)
309
318
310 def cmp(self, node, text):
319 def cmp(self, node, text):
311 # We don't call `super` because narrow parents can be buggy in case of a
320 # We don't call `super` because narrow parents can be buggy in case of a
312 # ambiguous dirstate. Always take the slow path until there is a better
321 # ambiguous dirstate. Always take the slow path until there is a better
313 # fix, see issue6150.
322 # fix, see issue6150.
314
323
315 # Censored files compare against the empty file.
324 # Censored files compare against the empty file.
316 if self.iscensored(self.rev(node)):
325 if self.iscensored(self.rev(node)):
317 return text != b''
326 return text != b''
318
327
319 return self.read(node) != text
328 return self.read(node) != text
@@ -1,2780 +1,2797 b''
1 # manifest.py - manifest revision class for mercurial
1 # manifest.py - manifest revision class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import heapq
9 import heapq
10 import itertools
10 import itertools
11 import struct
11 import struct
12 import typing
12 import typing
13 import weakref
13 import weakref
14
14
15 from typing import (
15 from typing import (
16 ByteString,
16 ByteString,
17 Callable,
17 Callable,
18 Collection,
18 Collection,
19 Dict,
19 Dict,
20 Iterable,
20 Iterable,
21 Iterator,
21 Iterator,
22 List,
22 List,
23 Optional,
23 Optional,
24 Set,
24 Set,
25 Tuple,
25 Tuple,
26 Union,
26 Union,
27 cast,
27 cast,
28 )
28 )
29
29
30 from .i18n import _
30 from .i18n import _
31 from .node import (
31 from .node import (
32 bin,
32 bin,
33 hex,
33 hex,
34 nullrev,
34 nullrev,
35 )
35 )
36 from . import (
36 from . import (
37 encoding,
37 encoding,
38 error,
38 error,
39 match as matchmod,
39 match as matchmod,
40 mdiff,
40 mdiff,
41 pathutil,
41 pathutil,
42 policy,
42 policy,
43 pycompat,
43 pycompat,
44 revlog,
44 revlog,
45 util,
45 util,
46 )
46 )
47 from .interfaces import (
47 from .interfaces import (
48 repository,
48 repository,
49 util as interfaceutil,
49 util as interfaceutil,
50 )
50 )
51 from .revlogutils import (
51 from .revlogutils import (
52 constants as revlog_constants,
52 constants as revlog_constants,
53 )
53 )
54
54
55 parsers = policy.importmod('parsers')
55 parsers = policy.importmod('parsers')
56 propertycache = util.propertycache
56 propertycache = util.propertycache
57
57
58 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
58 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
59 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
59 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
60
60
61
61
62 def _parse(nodelen, data: bytes):
62 def _parse(nodelen, data: bytes):
63 # This method does a little bit of excessive-looking
63 # This method does a little bit of excessive-looking
64 # precondition checking. This is so that the behavior of this
64 # precondition checking. This is so that the behavior of this
65 # class exactly matches its C counterpart to try and help
65 # class exactly matches its C counterpart to try and help
66 # prevent surprise breakage for anyone that develops against
66 # prevent surprise breakage for anyone that develops against
67 # the pure version.
67 # the pure version.
68 if data and data[-1:] != b'\n':
68 if data and data[-1:] != b'\n':
69 raise ValueError(b'Manifest did not end in a newline.')
69 raise ValueError(b'Manifest did not end in a newline.')
70 prev = None
70 prev = None
71 for l in data.splitlines():
71 for l in data.splitlines():
72 if prev is not None and prev > l:
72 if prev is not None and prev > l:
73 raise ValueError(b'Manifest lines not in sorted order.')
73 raise ValueError(b'Manifest lines not in sorted order.')
74 prev = l
74 prev = l
75 f, n = l.split(b'\0')
75 f, n = l.split(b'\0')
76 nl = len(n)
76 nl = len(n)
77 flags = n[-1:]
77 flags = n[-1:]
78 if flags in _manifestflags:
78 if flags in _manifestflags:
79 n = n[:-1]
79 n = n[:-1]
80 nl -= 1
80 nl -= 1
81 else:
81 else:
82 flags = b''
82 flags = b''
83 if nl != 2 * nodelen:
83 if nl != 2 * nodelen:
84 raise ValueError(b'Invalid manifest line')
84 raise ValueError(b'Invalid manifest line')
85
85
86 yield f, bin(n), flags
86 yield f, bin(n), flags
87
87
88
88
89 def _text(it):
89 def _text(it):
90 files = []
90 files = []
91 lines = []
91 lines = []
92 for f, n, fl in it:
92 for f, n, fl in it:
93 files.append(f)
93 files.append(f)
94 # if this is changed to support newlines in filenames,
94 # if this is changed to support newlines in filenames,
95 # be sure to check the templates/ dir again (especially *-raw.tmpl)
95 # be sure to check the templates/ dir again (especially *-raw.tmpl)
96 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
96 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
97
97
98 _checkforbidden(files)
98 _checkforbidden(files)
99 return b''.join(lines)
99 return b''.join(lines)
100
100
101
101
102 class lazymanifestiter:
102 class lazymanifestiter:
103 def __init__(self, lm: '_LazyManifest') -> None:
103 def __init__(self, lm: '_LazyManifest') -> None:
104 self.pos = 0
104 self.pos = 0
105 self.lm = lm
105 self.lm = lm
106
106
107 def __iter__(self) -> 'lazymanifestiter':
107 def __iter__(self) -> 'lazymanifestiter':
108 return self
108 return self
109
109
110 def next(self) -> bytes:
110 def next(self) -> bytes:
111 try:
111 try:
112 data, pos = self.lm._get(self.pos)
112 data, pos = self.lm._get(self.pos)
113 except IndexError:
113 except IndexError:
114 raise StopIteration
114 raise StopIteration
115 if pos == -1:
115 if pos == -1:
116 assert isinstance(data, tuple)
116 assert isinstance(data, tuple)
117 self.pos += 1
117 self.pos += 1
118 return data[0]
118 return data[0]
119 assert isinstance(data, bytes)
119 assert isinstance(data, bytes)
120 self.pos += 1
120 self.pos += 1
121 zeropos = data.find(b'\x00', pos)
121 zeropos = data.find(b'\x00', pos)
122 return data[pos:zeropos]
122 return data[pos:zeropos]
123
123
124 __next__ = next
124 __next__ = next
125
125
126
126
127 class lazymanifestiterentries:
127 class lazymanifestiterentries:
128 def __init__(self, lm: '_LazyManifest') -> None:
128 def __init__(self, lm: '_LazyManifest') -> None:
129 self.lm = lm
129 self.lm = lm
130 self.pos = 0
130 self.pos = 0
131
131
132 def __iter__(self) -> 'lazymanifestiterentries':
132 def __iter__(self) -> 'lazymanifestiterentries':
133 return self
133 return self
134
134
135 def next(self) -> Tuple[bytes, bytes, bytes]:
135 def next(self) -> Tuple[bytes, bytes, bytes]:
136 try:
136 try:
137 data, pos = self.lm._get(self.pos)
137 data, pos = self.lm._get(self.pos)
138 except IndexError:
138 except IndexError:
139 raise StopIteration
139 raise StopIteration
140 if pos == -1:
140 if pos == -1:
141 assert isinstance(data, tuple)
141 assert isinstance(data, tuple)
142 self.pos += 1
142 self.pos += 1
143 return data
143 return data
144 assert isinstance(data, bytes)
144 assert isinstance(data, bytes)
145 zeropos = data.find(b'\x00', pos)
145 zeropos = data.find(b'\x00', pos)
146 nlpos = data.find(b'\n', pos)
146 nlpos = data.find(b'\n', pos)
147 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
147 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
148 raise error.StorageError(b'Invalid manifest line')
148 raise error.StorageError(b'Invalid manifest line')
149 flags = data[nlpos - 1 : nlpos]
149 flags = data[nlpos - 1 : nlpos]
150 if flags in _manifestflags:
150 if flags in _manifestflags:
151 hlen = nlpos - zeropos - 2
151 hlen = nlpos - zeropos - 2
152 else:
152 else:
153 hlen = nlpos - zeropos - 1
153 hlen = nlpos - zeropos - 1
154 flags = b''
154 flags = b''
155 if hlen != 2 * self.lm._nodelen:
155 if hlen != 2 * self.lm._nodelen:
156 raise error.StorageError(b'Invalid manifest line')
156 raise error.StorageError(b'Invalid manifest line')
157 hashval = unhexlify(
157 hashval = unhexlify(
158 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
158 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
159 )
159 )
160 self.pos += 1
160 self.pos += 1
161 return (data[pos:zeropos], hashval, flags)
161 return (data[pos:zeropos], hashval, flags)
162
162
163 __next__ = next
163 __next__ = next
164
164
165
165
166 def unhexlify(data: bytes, extra: int, pos, length: int):
166 def unhexlify(data: bytes, extra: int, pos, length: int):
167 s = bin(data[pos : pos + length])
167 s = bin(data[pos : pos + length])
168 if extra:
168 if extra:
169 s += bytes([extra & 0xFF])
169 s += bytes([extra & 0xFF])
170 return s
170 return s
171
171
172
172
173 def _cmp(a, b):
173 def _cmp(a, b):
174 return (a > b) - (a < b)
174 return (a > b) - (a < b)
175
175
176
176
177 _manifestflags = {b'', b'l', b't', b'x'}
177 _manifestflags = {b'', b'l', b't', b'x'}
178
178
179
179
180 class _LazyManifest:
180 class _LazyManifest:
181 """A pure python manifest backed by a byte string. It is supplimented with
181 """A pure python manifest backed by a byte string. It is supplimented with
182 internal lists as it is modified, until it is compacted back to a pure byte
182 internal lists as it is modified, until it is compacted back to a pure byte
183 string.
183 string.
184
184
185 ``data`` is the initial manifest data.
185 ``data`` is the initial manifest data.
186
186
187 ``positions`` is a list of offsets, one per manifest entry. Positive
187 ``positions`` is a list of offsets, one per manifest entry. Positive
188 values are offsets into ``data``, negative values are offsets into the
188 values are offsets into ``data``, negative values are offsets into the
189 ``extradata`` list. When an entry is removed, its entry is dropped from
189 ``extradata`` list. When an entry is removed, its entry is dropped from
190 ``positions``. The values are encoded such that when walking the list and
190 ``positions``. The values are encoded such that when walking the list and
191 indexing into ``data`` or ``extradata`` as appropriate, the entries are
191 indexing into ``data`` or ``extradata`` as appropriate, the entries are
192 sorted by filename.
192 sorted by filename.
193
193
194 ``extradata`` is a list of (key, hash, flags) for entries that were added or
194 ``extradata`` is a list of (key, hash, flags) for entries that were added or
195 modified since the manifest was created or compacted.
195 modified since the manifest was created or compacted.
196 """
196 """
197
197
198 def __init__(
198 def __init__(
199 self,
199 self,
200 nodelen: int,
200 nodelen: int,
201 data: bytes,
201 data: bytes,
202 positions=None,
202 positions=None,
203 extrainfo=None,
203 extrainfo=None,
204 extradata=None,
204 extradata=None,
205 hasremovals: bool = False,
205 hasremovals: bool = False,
206 ):
206 ):
207 self._nodelen = nodelen
207 self._nodelen = nodelen
208 if positions is None:
208 if positions is None:
209 self.positions = self.findlines(data)
209 self.positions = self.findlines(data)
210 self.extrainfo = [0] * len(self.positions)
210 self.extrainfo = [0] * len(self.positions)
211 self.data = data
211 self.data = data
212 self.extradata = []
212 self.extradata = []
213 self.hasremovals = False
213 self.hasremovals = False
214 else:
214 else:
215 self.positions = positions[:]
215 self.positions = positions[:]
216 self.extrainfo = extrainfo[:]
216 self.extrainfo = extrainfo[:]
217 self.extradata = extradata[:]
217 self.extradata = extradata[:]
218 self.data = data
218 self.data = data
219 self.hasremovals = hasremovals
219 self.hasremovals = hasremovals
220
220
221 def findlines(self, data: bytes) -> List[int]:
221 def findlines(self, data: bytes) -> List[int]:
222 if not data:
222 if not data:
223 return []
223 return []
224 pos = data.find(b"\n")
224 pos = data.find(b"\n")
225 if pos == -1 or data[-1:] != b'\n':
225 if pos == -1 or data[-1:] != b'\n':
226 raise ValueError(b"Manifest did not end in a newline.")
226 raise ValueError(b"Manifest did not end in a newline.")
227 positions = [0]
227 positions = [0]
228 prev = data[: data.find(b'\x00')]
228 prev = data[: data.find(b'\x00')]
229 while pos < len(data) - 1 and pos != -1:
229 while pos < len(data) - 1 and pos != -1:
230 positions.append(pos + 1)
230 positions.append(pos + 1)
231 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
231 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
232 if nexts < prev:
232 if nexts < prev:
233 raise ValueError(b"Manifest lines not in sorted order.")
233 raise ValueError(b"Manifest lines not in sorted order.")
234 prev = nexts
234 prev = nexts
235 pos = data.find(b"\n", pos + 1)
235 pos = data.find(b"\n", pos + 1)
236 return positions
236 return positions
237
237
238 def _get(
238 def _get(
239 self, index: int
239 self, index: int
240 ) -> Tuple[Union[bytes, Tuple[bytes, bytes, bytes]], int]:
240 ) -> Tuple[Union[bytes, Tuple[bytes, bytes, bytes]], int]:
241 # get the position encoded in pos:
241 # get the position encoded in pos:
242 # positive number is an index in 'data'
242 # positive number is an index in 'data'
243 # negative number is in extrapieces
243 # negative number is in extrapieces
244 pos = self.positions[index]
244 pos = self.positions[index]
245 if pos >= 0:
245 if pos >= 0:
246 return self.data, pos
246 return self.data, pos
247 return self.extradata[-pos - 1], -1
247 return self.extradata[-pos - 1], -1
248
248
249 def _getkey(self, pos) -> bytes:
249 def _getkey(self, pos) -> bytes:
250 if pos >= 0:
250 if pos >= 0:
251 return self.data[pos : self.data.find(b'\x00', pos + 1)]
251 return self.data[pos : self.data.find(b'\x00', pos + 1)]
252 return self.extradata[-pos - 1][0]
252 return self.extradata[-pos - 1][0]
253
253
254 def bsearch(self, key: bytes) -> int:
254 def bsearch(self, key: bytes) -> int:
255 first = 0
255 first = 0
256 last = len(self.positions) - 1
256 last = len(self.positions) - 1
257
257
258 while first <= last:
258 while first <= last:
259 midpoint = (first + last) // 2
259 midpoint = (first + last) // 2
260 nextpos = self.positions[midpoint]
260 nextpos = self.positions[midpoint]
261 candidate = self._getkey(nextpos)
261 candidate = self._getkey(nextpos)
262 r = _cmp(key, candidate)
262 r = _cmp(key, candidate)
263 if r == 0:
263 if r == 0:
264 return midpoint
264 return midpoint
265 else:
265 else:
266 if r < 0:
266 if r < 0:
267 last = midpoint - 1
267 last = midpoint - 1
268 else:
268 else:
269 first = midpoint + 1
269 first = midpoint + 1
270 return -1
270 return -1
271
271
272 def bsearch2(self, key: bytes) -> Tuple[int, bool]:
272 def bsearch2(self, key: bytes) -> Tuple[int, bool]:
273 # same as the above, but will always return the position
273 # same as the above, but will always return the position
274 # done for performance reasons
274 # done for performance reasons
275 first = 0
275 first = 0
276 last = len(self.positions) - 1
276 last = len(self.positions) - 1
277
277
278 while first <= last:
278 while first <= last:
279 midpoint = (first + last) // 2
279 midpoint = (first + last) // 2
280 nextpos = self.positions[midpoint]
280 nextpos = self.positions[midpoint]
281 candidate = self._getkey(nextpos)
281 candidate = self._getkey(nextpos)
282 r = _cmp(key, candidate)
282 r = _cmp(key, candidate)
283 if r == 0:
283 if r == 0:
284 return (midpoint, True)
284 return (midpoint, True)
285 else:
285 else:
286 if r < 0:
286 if r < 0:
287 last = midpoint - 1
287 last = midpoint - 1
288 else:
288 else:
289 first = midpoint + 1
289 first = midpoint + 1
290 return (first, False)
290 return (first, False)
291
291
292 def __contains__(self, key: bytes) -> bool:
292 def __contains__(self, key: bytes) -> bool:
293 return self.bsearch(key) != -1
293 return self.bsearch(key) != -1
294
294
295 def __getitem__(self, key: bytes) -> Tuple[bytes, bytes]:
295 def __getitem__(self, key: bytes) -> Tuple[bytes, bytes]:
296 if not isinstance(key, bytes):
296 if not isinstance(key, bytes):
297 raise TypeError(b"getitem: manifest keys must be a bytes.")
297 raise TypeError(b"getitem: manifest keys must be a bytes.")
298 needle = self.bsearch(key)
298 needle = self.bsearch(key)
299 if needle == -1:
299 if needle == -1:
300 raise KeyError
300 raise KeyError
301 data, pos = self._get(needle)
301 data, pos = self._get(needle)
302 if pos == -1:
302 if pos == -1:
303 assert isinstance(data, tuple)
303 assert isinstance(data, tuple)
304 return (data[1], data[2])
304 return (data[1], data[2])
305
305
306 assert isinstance(data, bytes)
306 assert isinstance(data, bytes)
307 zeropos = data.find(b'\x00', pos)
307 zeropos = data.find(b'\x00', pos)
308 nlpos = data.find(b'\n', zeropos)
308 nlpos = data.find(b'\n', zeropos)
309 assert 0 <= needle <= len(self.positions)
309 assert 0 <= needle <= len(self.positions)
310 assert len(self.extrainfo) == len(self.positions)
310 assert len(self.extrainfo) == len(self.positions)
311 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
311 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
312 raise error.StorageError(b'Invalid manifest line')
312 raise error.StorageError(b'Invalid manifest line')
313 hlen = nlpos - zeropos - 1
313 hlen = nlpos - zeropos - 1
314 flags = data[nlpos - 1 : nlpos]
314 flags = data[nlpos - 1 : nlpos]
315 if flags in _manifestflags:
315 if flags in _manifestflags:
316 hlen -= 1
316 hlen -= 1
317 else:
317 else:
318 flags = b''
318 flags = b''
319 if hlen != 2 * self._nodelen:
319 if hlen != 2 * self._nodelen:
320 raise error.StorageError(b'Invalid manifest line')
320 raise error.StorageError(b'Invalid manifest line')
321 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
321 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
322 return (hashval, flags)
322 return (hashval, flags)
323
323
324 def __delitem__(self, key: bytes) -> None:
324 def __delitem__(self, key: bytes) -> None:
325 needle, found = self.bsearch2(key)
325 needle, found = self.bsearch2(key)
326 if not found:
326 if not found:
327 raise KeyError
327 raise KeyError
328 cur = self.positions[needle]
328 cur = self.positions[needle]
329 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
329 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
330 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
330 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
331 if cur >= 0:
331 if cur >= 0:
332 # This does NOT unsort the list as far as the search functions are
332 # This does NOT unsort the list as far as the search functions are
333 # concerned, as they only examine lines mapped by self.positions.
333 # concerned, as they only examine lines mapped by self.positions.
334 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
334 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
335 self.hasremovals = True
335 self.hasremovals = True
336
336
337 def __setitem__(self, key: bytes, value: Tuple[bytes, bytes]):
337 def __setitem__(self, key: bytes, value: Tuple[bytes, bytes]):
338 if not isinstance(key, bytes):
338 if not isinstance(key, bytes):
339 raise TypeError(b"setitem: manifest keys must be a byte string.")
339 raise TypeError(b"setitem: manifest keys must be a byte string.")
340 if not isinstance(value, tuple) or len(value) != 2:
340 if not isinstance(value, tuple) or len(value) != 2:
341 raise TypeError(
341 raise TypeError(
342 b"Manifest values must be a tuple of (node, flags)."
342 b"Manifest values must be a tuple of (node, flags)."
343 )
343 )
344 hashval = value[0]
344 hashval = value[0]
345 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
345 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
346 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
346 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
347 flags = value[1]
347 flags = value[1]
348 if not isinstance(flags, bytes) or len(flags) > 1:
348 if not isinstance(flags, bytes) or len(flags) > 1:
349 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
349 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
350 needle, found = self.bsearch2(key)
350 needle, found = self.bsearch2(key)
351 if found:
351 if found:
352 # put the item
352 # put the item
353 pos = self.positions[needle]
353 pos = self.positions[needle]
354 if pos < 0:
354 if pos < 0:
355 self.extradata[-pos - 1] = (key, hashval, value[1])
355 self.extradata[-pos - 1] = (key, hashval, value[1])
356 else:
356 else:
357 # just don't bother
357 # just don't bother
358 self.extradata.append((key, hashval, value[1]))
358 self.extradata.append((key, hashval, value[1]))
359 self.positions[needle] = -len(self.extradata)
359 self.positions[needle] = -len(self.extradata)
360 else:
360 else:
361 # not found, put it in with extra positions
361 # not found, put it in with extra positions
362 self.extradata.append((key, hashval, value[1]))
362 self.extradata.append((key, hashval, value[1]))
363 self.positions = (
363 self.positions = (
364 self.positions[:needle]
364 self.positions[:needle]
365 + [-len(self.extradata)]
365 + [-len(self.extradata)]
366 + self.positions[needle:]
366 + self.positions[needle:]
367 )
367 )
368 self.extrainfo = (
368 self.extrainfo = (
369 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
369 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
370 )
370 )
371
371
372 def copy(self) -> '_LazyManifest':
372 def copy(self) -> '_LazyManifest':
373 # XXX call _compact like in C?
373 # XXX call _compact like in C?
374 return _lazymanifest(
374 return _lazymanifest(
375 self._nodelen,
375 self._nodelen,
376 self.data,
376 self.data,
377 self.positions,
377 self.positions,
378 self.extrainfo,
378 self.extrainfo,
379 self.extradata,
379 self.extradata,
380 self.hasremovals,
380 self.hasremovals,
381 )
381 )
382
382
383 def _compact(self) -> None:
383 def _compact(self) -> None:
384 # hopefully not called TOO often
384 # hopefully not called TOO often
385 if len(self.extradata) == 0 and not self.hasremovals:
385 if len(self.extradata) == 0 and not self.hasremovals:
386 return
386 return
387 l = []
387 l = []
388 i = 0
388 i = 0
389 offset = 0
389 offset = 0
390 self.extrainfo = [0] * len(self.positions)
390 self.extrainfo = [0] * len(self.positions)
391 while i < len(self.positions):
391 while i < len(self.positions):
392 if self.positions[i] >= 0:
392 if self.positions[i] >= 0:
393 cur = self.positions[i]
393 cur = self.positions[i]
394 last_cut = cur
394 last_cut = cur
395
395
396 # Collect all contiguous entries in the buffer at the current
396 # Collect all contiguous entries in the buffer at the current
397 # offset, breaking out only for added/modified items held in
397 # offset, breaking out only for added/modified items held in
398 # extradata, or a deleted line prior to the next position.
398 # extradata, or a deleted line prior to the next position.
399 while True:
399 while True:
400 self.positions[i] = offset
400 self.positions[i] = offset
401 i += 1
401 i += 1
402 if i == len(self.positions) or self.positions[i] < 0:
402 if i == len(self.positions) or self.positions[i] < 0:
403 break
403 break
404
404
405 # A removed file has no positions[] entry, but does have an
405 # A removed file has no positions[] entry, but does have an
406 # overwritten first byte. Break out and find the end of the
406 # overwritten first byte. Break out and find the end of the
407 # current good entry/entries if there is a removed file
407 # current good entry/entries if there is a removed file
408 # before the next position.
408 # before the next position.
409 if (
409 if (
410 self.hasremovals
410 self.hasremovals
411 and self.data.find(b'\n\x00', cur, self.positions[i])
411 and self.data.find(b'\n\x00', cur, self.positions[i])
412 != -1
412 != -1
413 ):
413 ):
414 break
414 break
415
415
416 offset += self.positions[i] - cur
416 offset += self.positions[i] - cur
417 cur = self.positions[i]
417 cur = self.positions[i]
418 end_cut = self.data.find(b'\n', cur)
418 end_cut = self.data.find(b'\n', cur)
419 if end_cut != -1:
419 if end_cut != -1:
420 end_cut += 1
420 end_cut += 1
421 offset += end_cut - cur
421 offset += end_cut - cur
422 l.append(self.data[last_cut:end_cut])
422 l.append(self.data[last_cut:end_cut])
423 else:
423 else:
424 while i < len(self.positions) and self.positions[i] < 0:
424 while i < len(self.positions) and self.positions[i] < 0:
425 cur = self.positions[i]
425 cur = self.positions[i]
426 t = self.extradata[-cur - 1]
426 t = self.extradata[-cur - 1]
427 l.append(self._pack(t))
427 l.append(self._pack(t))
428 self.positions[i] = offset
428 self.positions[i] = offset
429 # Hashes are either 20 bytes (old sha1s) or 32
429 # Hashes are either 20 bytes (old sha1s) or 32
430 # bytes (new non-sha1).
430 # bytes (new non-sha1).
431 hlen = 20
431 hlen = 20
432 if len(t[1]) > 25:
432 if len(t[1]) > 25:
433 hlen = 32
433 hlen = 32
434 if len(t[1]) > hlen:
434 if len(t[1]) > hlen:
435 self.extrainfo[i] = ord(t[1][hlen + 1])
435 self.extrainfo[i] = ord(t[1][hlen + 1])
436 offset += len(l[-1])
436 offset += len(l[-1])
437 i += 1
437 i += 1
438 self.data = b''.join(l)
438 self.data = b''.join(l)
439 self.hasremovals = False
439 self.hasremovals = False
440 self.extradata = []
440 self.extradata = []
441
441
442 def _pack(self, d: Tuple[bytes, bytes, bytes]) -> bytes:
442 def _pack(self, d: Tuple[bytes, bytes, bytes]) -> bytes:
443 n = d[1]
443 n = d[1]
444 assert len(n) in (20, 32)
444 assert len(n) in (20, 32)
445 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
445 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
446
446
447 def text(self) -> ByteString:
447 def text(self) -> ByteString:
448 self._compact()
448 self._compact()
449 return self.data
449 return self.data
450
450
451 def diff(
451 def diff(
452 self, m2: '_LazyManifest', clean: bool = False
452 self, m2: '_LazyManifest', clean: bool = False
453 ) -> Dict[
453 ) -> Dict[
454 bytes,
454 bytes,
455 Optional[
455 Optional[
456 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
456 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
457 ],
457 ],
458 ]:
458 ]:
459 '''Finds changes between the current manifest and m2.'''
459 '''Finds changes between the current manifest and m2.'''
460 # XXX think whether efficiency matters here
460 # XXX think whether efficiency matters here
461 diff = {}
461 diff = {}
462
462
463 for fn, e1, flags in self.iterentries():
463 for fn, e1, flags in self.iterentries():
464 if fn not in m2:
464 if fn not in m2:
465 diff[fn] = (e1, flags), (None, b'')
465 diff[fn] = (e1, flags), (None, b'')
466 else:
466 else:
467 e2 = m2[fn]
467 e2 = m2[fn]
468 if (e1, flags) != e2:
468 if (e1, flags) != e2:
469 diff[fn] = (e1, flags), e2
469 diff[fn] = (e1, flags), e2
470 elif clean:
470 elif clean:
471 diff[fn] = None
471 diff[fn] = None
472
472
473 for fn, e2, flags in m2.iterentries():
473 for fn, e2, flags in m2.iterentries():
474 if fn not in self:
474 if fn not in self:
475 diff[fn] = (None, b''), (e2, flags)
475 diff[fn] = (None, b''), (e2, flags)
476
476
477 return diff
477 return diff
478
478
479 def iterentries(self) -> lazymanifestiterentries:
479 def iterentries(self) -> lazymanifestiterentries:
480 return lazymanifestiterentries(self)
480 return lazymanifestiterentries(self)
481
481
482 def iterkeys(self) -> lazymanifestiter:
482 def iterkeys(self) -> lazymanifestiter:
483 return lazymanifestiter(self)
483 return lazymanifestiter(self)
484
484
485 def __iter__(self) -> lazymanifestiter:
485 def __iter__(self) -> lazymanifestiter:
486 return lazymanifestiter(self)
486 return lazymanifestiter(self)
487
487
488 def __len__(self) -> int:
488 def __len__(self) -> int:
489 return len(self.positions)
489 return len(self.positions)
490
490
491 def filtercopy(self, filterfn: Callable[[bytes], bool]) -> '_LazyManifest':
491 def filtercopy(self, filterfn: Callable[[bytes], bool]) -> '_LazyManifest':
492 # XXX should be optimized
492 # XXX should be optimized
493 c = _lazymanifest(self._nodelen, b'')
493 c = _lazymanifest(self._nodelen, b'')
494 for f, n, fl in self.iterentries():
494 for f, n, fl in self.iterentries():
495 if filterfn(f):
495 if filterfn(f):
496 c[f] = n, fl
496 c[f] = n, fl
497 return c
497 return c
498
498
499
499
500 try:
500 try:
501 _lazymanifest = parsers.lazymanifest
501 _lazymanifest = parsers.lazymanifest
502 except AttributeError:
502 except AttributeError:
503 _lazymanifest = _LazyManifest
503 _lazymanifest = _LazyManifest
504
504
505
505
506 class ManifestDict:
506 class ManifestDict:
507 def __init__(self, nodelen: int, data: ByteString = b''):
507 def __init__(self, nodelen: int, data: ByteString = b''):
508 self._nodelen = nodelen
508 self._nodelen = nodelen
509 self._lm = _lazymanifest(nodelen, data)
509 self._lm = _lazymanifest(nodelen, data)
510
510
511 def __getitem__(self, key: bytes) -> bytes:
511 def __getitem__(self, key: bytes) -> bytes:
512 return self._lm[key][0]
512 return self._lm[key][0]
513
513
514 def find(self, key: bytes) -> Tuple[bytes, bytes]:
514 def find(self, key: bytes) -> Tuple[bytes, bytes]:
515 return self._lm[key]
515 return self._lm[key]
516
516
517 def __len__(self) -> int:
517 def __len__(self) -> int:
518 return len(self._lm)
518 return len(self._lm)
519
519
520 def __nonzero__(self) -> bool:
520 def __nonzero__(self) -> bool:
521 # nonzero is covered by the __len__ function, but implementing it here
521 # nonzero is covered by the __len__ function, but implementing it here
522 # makes it easier for extensions to override.
522 # makes it easier for extensions to override.
523 return len(self._lm) != 0
523 return len(self._lm) != 0
524
524
525 __bool__ = __nonzero__
525 __bool__ = __nonzero__
526
526
527 def set(self, key: bytes, node: bytes, flags: bytes) -> None:
527 def set(self, key: bytes, node: bytes, flags: bytes) -> None:
528 self._lm[key] = node, flags
528 self._lm[key] = node, flags
529
529
530 def __setitem__(self, key: bytes, node: bytes) -> None:
530 def __setitem__(self, key: bytes, node: bytes) -> None:
531 self._lm[key] = node, self.flags(key)
531 self._lm[key] = node, self.flags(key)
532
532
533 def __contains__(self, key: bytes) -> bool:
533 def __contains__(self, key: bytes) -> bool:
534 if key is None:
534 if key is None:
535 return False
535 return False
536 return key in self._lm
536 return key in self._lm
537
537
538 def __delitem__(self, key: bytes) -> None:
538 def __delitem__(self, key: bytes) -> None:
539 del self._lm[key]
539 del self._lm[key]
540
540
541 def __iter__(self) -> Iterator[bytes]:
541 def __iter__(self) -> Iterator[bytes]:
542 return self._lm.__iter__()
542 return self._lm.__iter__()
543
543
544 def iterkeys(self) -> Iterator[bytes]:
544 def iterkeys(self) -> Iterator[bytes]:
545 return self._lm.iterkeys()
545 return self._lm.iterkeys()
546
546
547 def keys(self) -> List[bytes]:
547 def keys(self) -> List[bytes]:
548 return list(self.iterkeys())
548 return list(self.iterkeys())
549
549
550 def filesnotin(self, m2, match=None) -> Set[bytes]:
550 def filesnotin(self, m2, match=None) -> Set[bytes]:
551 '''Set of files in this manifest that are not in the other'''
551 '''Set of files in this manifest that are not in the other'''
552 if match is not None:
552 if match is not None:
553 match = matchmod.badmatch(match, lambda path, msg: None)
553 match = matchmod.badmatch(match, lambda path, msg: None)
554 sm2 = set(m2.walk(match))
554 sm2 = set(m2.walk(match))
555 return {f for f in self.walk(match) if f not in sm2}
555 return {f for f in self.walk(match) if f not in sm2}
556 return {f for f in self if f not in m2}
556 return {f for f in self if f not in m2}
557
557
558 @propertycache
558 @propertycache
559 def _dirs(self) -> pathutil.dirs:
559 def _dirs(self) -> pathutil.dirs:
560 return pathutil.dirs(self)
560 return pathutil.dirs(self)
561
561
562 def dirs(self) -> pathutil.dirs:
562 def dirs(self) -> pathutil.dirs:
563 return self._dirs
563 return self._dirs
564
564
565 def hasdir(self, dir: bytes) -> bool:
565 def hasdir(self, dir: bytes) -> bool:
566 return dir in self._dirs
566 return dir in self._dirs
567
567
568 def _filesfastpath(self, match: matchmod.basematcher) -> bool:
568 def _filesfastpath(self, match: matchmod.basematcher) -> bool:
569 """Checks whether we can correctly and quickly iterate over matcher
569 """Checks whether we can correctly and quickly iterate over matcher
570 files instead of over manifest files."""
570 files instead of over manifest files."""
571 files = match.files()
571 files = match.files()
572 return len(files) < 100 and (
572 return len(files) < 100 and (
573 match.isexact()
573 match.isexact()
574 or (match.prefix() and all(fn in self for fn in files))
574 or (match.prefix() and all(fn in self for fn in files))
575 )
575 )
576
576
577 def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
577 def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
578 """Generates matching file names.
578 """Generates matching file names.
579
579
580 Equivalent to manifest.matches(match).iterkeys(), but without creating
580 Equivalent to manifest.matches(match).iterkeys(), but without creating
581 an entirely new manifest.
581 an entirely new manifest.
582
582
583 It also reports nonexistent files by marking them bad with match.bad().
583 It also reports nonexistent files by marking them bad with match.bad().
584 """
584 """
585 if match.always():
585 if match.always():
586 for f in iter(self):
586 for f in iter(self):
587 yield f
587 yield f
588 return
588 return
589
589
590 fset = set(match.files())
590 fset = set(match.files())
591
591
592 # avoid the entire walk if we're only looking for specific files
592 # avoid the entire walk if we're only looking for specific files
593 if self._filesfastpath(match):
593 if self._filesfastpath(match):
594 for fn in sorted(fset):
594 for fn in sorted(fset):
595 if fn in self:
595 if fn in self:
596 yield fn
596 yield fn
597 return
597 return
598
598
599 for fn in self:
599 for fn in self:
600 if fn in fset:
600 if fn in fset:
601 # specified pattern is the exact name
601 # specified pattern is the exact name
602 fset.remove(fn)
602 fset.remove(fn)
603 if match(fn):
603 if match(fn):
604 yield fn
604 yield fn
605
605
606 # for dirstate.walk, files=[''] means "walk the whole tree".
606 # for dirstate.walk, files=[''] means "walk the whole tree".
607 # follow that here, too
607 # follow that here, too
608 fset.discard(b'')
608 fset.discard(b'')
609
609
610 for fn in sorted(fset):
610 for fn in sorted(fset):
611 if not self.hasdir(fn):
611 if not self.hasdir(fn):
612 match.bad(fn, None)
612 match.bad(fn, None)
613
613
614 def _matches(self, match: matchmod.basematcher) -> 'ManifestDict':
614 def _matches(self, match: matchmod.basematcher) -> 'ManifestDict':
615 '''generate a new manifest filtered by the match argument'''
615 '''generate a new manifest filtered by the match argument'''
616 if match.always():
616 if match.always():
617 return self.copy()
617 return self.copy()
618
618
619 if self._filesfastpath(match):
619 if self._filesfastpath(match):
620 m = manifestdict(self._nodelen)
620 m = manifestdict(self._nodelen)
621 lm = self._lm
621 lm = self._lm
622 for fn in match.files():
622 for fn in match.files():
623 if fn in lm:
623 if fn in lm:
624 m._lm[fn] = lm[fn]
624 m._lm[fn] = lm[fn]
625 return m
625 return m
626
626
627 m = manifestdict(self._nodelen)
627 m = manifestdict(self._nodelen)
628 m._lm = self._lm.filtercopy(match)
628 m._lm = self._lm.filtercopy(match)
629 return m
629 return m
630
630
631 def diff(
631 def diff(
632 self,
632 self,
633 m2: 'ManifestDict',
633 m2: 'ManifestDict',
634 match: Optional[matchmod.basematcher] = None,
634 match: Optional[matchmod.basematcher] = None,
635 clean: bool = False,
635 clean: bool = False,
636 ) -> Dict[
636 ) -> Dict[
637 bytes,
637 bytes,
638 Optional[
638 Optional[
639 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
639 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
640 ],
640 ],
641 ]:
641 ]:
642 """Finds changes between the current manifest and m2.
642 """Finds changes between the current manifest and m2.
643
643
644 Args:
644 Args:
645 m2: the manifest to which this manifest should be compared.
645 m2: the manifest to which this manifest should be compared.
646 clean: if true, include files unchanged between these manifests
646 clean: if true, include files unchanged between these manifests
647 with a None value in the returned dictionary.
647 with a None value in the returned dictionary.
648
648
649 The result is returned as a dict with filename as key and
649 The result is returned as a dict with filename as key and
650 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
650 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
651 nodeid in the current/other manifest and fl1/fl2 is the flag
651 nodeid in the current/other manifest and fl1/fl2 is the flag
652 in the current/other manifest. Where the file does not exist,
652 in the current/other manifest. Where the file does not exist,
653 the nodeid will be None and the flags will be the empty
653 the nodeid will be None and the flags will be the empty
654 string.
654 string.
655 """
655 """
656 if match:
656 if match:
657 m1 = self._matches(match)
657 m1 = self._matches(match)
658 m2 = m2._matches(match)
658 m2 = m2._matches(match)
659 return m1.diff(m2, clean=clean)
659 return m1.diff(m2, clean=clean)
660 return self._lm.diff(m2._lm, clean)
660 return self._lm.diff(m2._lm, clean)
661
661
662 def setflag(self, key: bytes, flag: bytes) -> None:
662 def setflag(self, key: bytes, flag: bytes) -> None:
663 if flag not in _manifestflags:
663 if flag not in _manifestflags:
664 raise TypeError(b"Invalid manifest flag set.")
664 raise TypeError(b"Invalid manifest flag set.")
665 self._lm[key] = self[key], flag
665 self._lm[key] = self[key], flag
666
666
667 def get(self, key: bytes, default=None) -> Optional[bytes]:
667 def get(self, key: bytes, default=None) -> Optional[bytes]:
668 try:
668 try:
669 return self._lm[key][0]
669 return self._lm[key][0]
670 except KeyError:
670 except KeyError:
671 return default
671 return default
672
672
673 def flags(self, key: bytes) -> bytes:
673 def flags(self, key: bytes) -> bytes:
674 try:
674 try:
675 return self._lm[key][1]
675 return self._lm[key][1]
676 except KeyError:
676 except KeyError:
677 return b''
677 return b''
678
678
679 def copy(self) -> 'ManifestDict':
679 def copy(self) -> 'ManifestDict':
680 c = manifestdict(self._nodelen)
680 c = manifestdict(self._nodelen)
681 c._lm = self._lm.copy()
681 c._lm = self._lm.copy()
682 return c
682 return c
683
683
684 def items(self) -> Iterator[Tuple[bytes, bytes]]:
684 def items(self) -> Iterator[Tuple[bytes, bytes]]:
685 return (x[:2] for x in self._lm.iterentries())
685 return (x[:2] for x in self._lm.iterentries())
686
686
687 def iteritems(self) -> Iterator[Tuple[bytes, bytes]]:
687 def iteritems(self) -> Iterator[Tuple[bytes, bytes]]:
688 return (x[:2] for x in self._lm.iterentries())
688 return (x[:2] for x in self._lm.iterentries())
689
689
690 def iterentries(self) -> Iterator[Tuple[bytes, bytes, bytes]]:
690 def iterentries(self) -> Iterator[Tuple[bytes, bytes, bytes]]:
691 return self._lm.iterentries()
691 return self._lm.iterentries()
692
692
693 def text(self) -> ByteString:
693 def text(self) -> ByteString:
694 # most likely uses native version
694 # most likely uses native version
695 return self._lm.text()
695 return self._lm.text()
696
696
697 def fastdelta(
697 def fastdelta(
698 self, base: ByteString, changes: Iterable[Tuple[bytes, bool]]
698 self, base: ByteString, changes: Iterable[Tuple[bytes, bool]]
699 ) -> Tuple[ByteString, ByteString]:
699 ) -> Tuple[ByteString, ByteString]:
700 """Given a base manifest text as a bytearray and a list of changes
700 """Given a base manifest text as a bytearray and a list of changes
701 relative to that text, compute a delta that can be used by revlog.
701 relative to that text, compute a delta that can be used by revlog.
702 """
702 """
703 delta = []
703 delta = []
704 dstart = None
704 dstart = None
705 dend = None
705 dend = None
706 dline = [b""]
706 dline = [b""]
707 start = 0
707 start = 0
708 # zero copy representation of base as a buffer
708 # zero copy representation of base as a buffer
709 addbuf = util.buffer(base)
709 addbuf = util.buffer(base)
710
710
711 changes = list(changes)
711 changes = list(changes)
712 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
712 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
713 # start with a readonly loop that finds the offset of
713 # start with a readonly loop that finds the offset of
714 # each line and creates the deltas
714 # each line and creates the deltas
715 for f, todelete in changes:
715 for f, todelete in changes:
716 # bs will either be the index of the item or the insert point
716 # bs will either be the index of the item or the insert point
717 start, end = _msearch(addbuf, f, start)
717 start, end = _msearch(addbuf, f, start)
718 if not todelete:
718 if not todelete:
719 h, fl = self._lm[f]
719 h, fl = self._lm[f]
720 l = b"%s\0%s%s\n" % (f, hex(h), fl)
720 l = b"%s\0%s%s\n" % (f, hex(h), fl)
721 else:
721 else:
722 if start == end:
722 if start == end:
723 # item we want to delete was not found, error out
723 # item we want to delete was not found, error out
724 raise AssertionError(
724 raise AssertionError(
725 _(b"failed to remove %s from manifest") % f
725 _(b"failed to remove %s from manifest") % f
726 )
726 )
727 l = b""
727 l = b""
728 if dstart is not None and dstart <= start and dend >= start:
728 if dstart is not None and dstart <= start and dend >= start:
729 if dend < end:
729 if dend < end:
730 dend = end
730 dend = end
731 if l:
731 if l:
732 dline.append(l)
732 dline.append(l)
733 else:
733 else:
734 if dstart is not None:
734 if dstart is not None:
735 delta.append((dstart, dend, b"".join(dline)))
735 delta.append((dstart, dend, b"".join(dline)))
736 dstart = start
736 dstart = start
737 dend = end
737 dend = end
738 dline = [l]
738 dline = [l]
739
739
740 if dstart is not None:
740 if dstart is not None:
741 delta.append((dstart, dend, b"".join(dline)))
741 delta.append((dstart, dend, b"".join(dline)))
742 # apply the delta to the base, and get a delta for addrevision
742 # apply the delta to the base, and get a delta for addrevision
743 deltatext, arraytext = _addlistdelta(base, delta)
743 deltatext, arraytext = _addlistdelta(base, delta)
744 else:
744 else:
745 # For large changes, it's much cheaper to just build the text and
745 # For large changes, it's much cheaper to just build the text and
746 # diff it.
746 # diff it.
747 arraytext = bytearray(self.text())
747 arraytext = bytearray(self.text())
748 deltatext = mdiff.textdiff(
748 deltatext = mdiff.textdiff(
749 util.buffer(base), util.buffer(arraytext)
749 util.buffer(base), util.buffer(arraytext)
750 )
750 )
751
751
752 return arraytext, deltatext
752 return arraytext, deltatext
753
753
754
754
755 manifestdict = interfaceutil.implementer(repository.imanifestdict)(ManifestDict)
755 manifestdict = interfaceutil.implementer(repository.imanifestdict)(ManifestDict)
756
756
757 if typing.TYPE_CHECKING:
757 if typing.TYPE_CHECKING:
758 manifestdict = ManifestDict
758 manifestdict = ManifestDict
759
759
760
760
761 def _msearch(
761 def _msearch(
762 m: ByteString, s: bytes, lo: int = 0, hi: Optional[int] = None
762 m: ByteString, s: bytes, lo: int = 0, hi: Optional[int] = None
763 ) -> Tuple[int, int]:
763 ) -> Tuple[int, int]:
764 """return a tuple (start, end) that says where to find s within m.
764 """return a tuple (start, end) that says where to find s within m.
765
765
766 If the string is found m[start:end] are the line containing
766 If the string is found m[start:end] are the line containing
767 that string. If start == end the string was not found and
767 that string. If start == end the string was not found and
768 they indicate the proper sorted insertion point.
768 they indicate the proper sorted insertion point.
769 """
769 """
770
770
771 def advance(i: int, c: bytes):
771 def advance(i: int, c: bytes):
772 while i < lenm and m[i : i + 1] != c:
772 while i < lenm and m[i : i + 1] != c:
773 i += 1
773 i += 1
774 return i
774 return i
775
775
776 if not s:
776 if not s:
777 return (lo, lo)
777 return (lo, lo)
778 lenm = len(m)
778 lenm = len(m)
779 if not hi:
779 if not hi:
780 hi = lenm
780 hi = lenm
781 while lo < hi:
781 while lo < hi:
782 mid = (lo + hi) // 2
782 mid = (lo + hi) // 2
783 start = mid
783 start = mid
784 while start > 0 and m[start - 1 : start] != b'\n':
784 while start > 0 and m[start - 1 : start] != b'\n':
785 start -= 1
785 start -= 1
786 end = advance(start, b'\0')
786 end = advance(start, b'\0')
787 if bytes(m[start:end]) < s:
787 if bytes(m[start:end]) < s:
788 # we know that after the null there are 40 bytes of sha1
788 # we know that after the null there are 40 bytes of sha1
789 # this translates to the bisect lo = mid + 1
789 # this translates to the bisect lo = mid + 1
790 lo = advance(end + 40, b'\n') + 1
790 lo = advance(end + 40, b'\n') + 1
791 else:
791 else:
792 # this translates to the bisect hi = mid
792 # this translates to the bisect hi = mid
793 hi = start
793 hi = start
794 end = advance(lo, b'\0')
794 end = advance(lo, b'\0')
795 found = m[lo:end]
795 found = m[lo:end]
796 if s == found:
796 if s == found:
797 # we know that after the null there are 40 bytes of sha1
797 # we know that after the null there are 40 bytes of sha1
798 end = advance(end + 40, b'\n')
798 end = advance(end + 40, b'\n')
799 return (lo, end + 1)
799 return (lo, end + 1)
800 else:
800 else:
801 return (lo, lo)
801 return (lo, lo)
802
802
803
803
804 def _checkforbidden(l: Iterable[bytes]) -> None:
804 def _checkforbidden(l: Iterable[bytes]) -> None:
805 """Check filenames for illegal characters."""
805 """Check filenames for illegal characters."""
806 for f in l:
806 for f in l:
807 if b'\n' in f or b'\r' in f:
807 if b'\n' in f or b'\r' in f:
808 raise error.StorageError(
808 raise error.StorageError(
809 _(b"'\\n' and '\\r' disallowed in filenames: %r")
809 _(b"'\\n' and '\\r' disallowed in filenames: %r")
810 % pycompat.bytestr(f)
810 % pycompat.bytestr(f)
811 )
811 )
812
812
813
813
814 # apply the changes collected during the bisect loop to our addlist
814 # apply the changes collected during the bisect loop to our addlist
815 # return a delta suitable for addrevision
815 # return a delta suitable for addrevision
816 def _addlistdelta(
816 def _addlistdelta(
817 addlist: ByteString,
817 addlist: ByteString,
818 x: Iterable[Tuple[int, int, bytes]],
818 x: Iterable[Tuple[int, int, bytes]],
819 ) -> Tuple[bytes, ByteString]:
819 ) -> Tuple[bytes, ByteString]:
820 # for large addlist arrays, building a new array is cheaper
820 # for large addlist arrays, building a new array is cheaper
821 # than repeatedly modifying the existing one
821 # than repeatedly modifying the existing one
822 currentposition = 0
822 currentposition = 0
823 newaddlist = bytearray()
823 newaddlist = bytearray()
824
824
825 for start, end, content in x:
825 for start, end, content in x:
826 newaddlist += addlist[currentposition:start]
826 newaddlist += addlist[currentposition:start]
827 if content:
827 if content:
828 newaddlist += bytearray(content)
828 newaddlist += bytearray(content)
829
829
830 currentposition = end
830 currentposition = end
831
831
832 newaddlist += addlist[currentposition:]
832 newaddlist += addlist[currentposition:]
833
833
834 deltatext = b"".join(
834 deltatext = b"".join(
835 struct.pack(b">lll", start, end, len(content)) + content
835 struct.pack(b">lll", start, end, len(content)) + content
836 for start, end, content in x
836 for start, end, content in x
837 )
837 )
838 return deltatext, newaddlist
838 return deltatext, newaddlist
839
839
840
840
841 def _splittopdir(f: bytes) -> Tuple[bytes, bytes]:
841 def _splittopdir(f: bytes) -> Tuple[bytes, bytes]:
842 if b'/' in f:
842 if b'/' in f:
843 dir, subpath = f.split(b'/', 1)
843 dir, subpath = f.split(b'/', 1)
844 return dir + b'/', subpath
844 return dir + b'/', subpath
845 else:
845 else:
846 return b'', f
846 return b'', f
847
847
848
848
849 _noop = lambda s: None
849 _noop = lambda s: None
850
850
851
851
852 class TreeManifest:
852 class TreeManifest:
853 _dir: bytes
854 _dirs: Dict[bytes, 'TreeManifest']
855 _dirty: bool
856 _files: Dict[bytes, bytes]
857 _flags: Dict[bytes, bytes]
858
853 def __init__(self, nodeconstants, dir: bytes = b'', text: bytes = b''):
859 def __init__(self, nodeconstants, dir: bytes = b'', text: bytes = b''):
854 self._dir = dir
860 self._dir = dir
855 self.nodeconstants = nodeconstants
861 self.nodeconstants = nodeconstants
856 self._node = self.nodeconstants.nullid
862 self._node = self.nodeconstants.nullid
857 self._nodelen = self.nodeconstants.nodelen
863 self._nodelen = self.nodeconstants.nodelen
858 self._loadfunc = _noop
864 self._loadfunc = _noop
859 self._copyfunc = _noop
865 self._copyfunc = _noop
860 self._dirty = False
866 self._dirty = False
861 self._dirs: Dict[bytes, 'TreeManifest'] = {}
867 self._dirs = {}
862 self._lazydirs: Dict[
868 self._lazydirs: Dict[
863 bytes,
869 bytes,
864 Tuple[bytes, Callable[[bytes, bytes], 'TreeManifest'], bool],
870 Tuple[bytes, Callable[[bytes, bytes], 'TreeManifest'], bool],
865 ] = {}
871 ] = {}
866 # Using _lazymanifest here is a little slower than plain old dicts
872 # Using _lazymanifest here is a little slower than plain old dicts
867 self._files: Dict[bytes, bytes] = {}
873 self._files = {}
868 self._flags = {}
874 self._flags = {}
869 if text:
875 if text:
870
876
871 def readsubtree(subdir, subm):
877 def readsubtree(subdir, subm):
872 raise AssertionError(
878 raise AssertionError(
873 b'treemanifest constructor only accepts flat manifests'
879 b'treemanifest constructor only accepts flat manifests'
874 )
880 )
875
881
876 self.parse(text, readsubtree)
882 self.parse(text, readsubtree)
877 self._dirty = True # Mark flat manifest dirty after parsing
883 self._dirty = True # Mark flat manifest dirty after parsing
878
884
879 def _subpath(self, path: bytes) -> bytes:
885 def _subpath(self, path: bytes) -> bytes:
880 return self._dir + path
886 return self._dir + path
881
887
882 def _loadalllazy(self) -> None:
888 def _loadalllazy(self) -> None:
883 selfdirs = self._dirs
889 selfdirs = self._dirs
884 subpath = self._subpath
890 subpath = self._subpath
885 for d, (node, readsubtree, docopy) in self._lazydirs.items():
891 for d, (node, readsubtree, docopy) in self._lazydirs.items():
886 if docopy:
892 if docopy:
887 selfdirs[d] = readsubtree(subpath(d), node).copy()
893 selfdirs[d] = readsubtree(subpath(d), node).copy()
888 else:
894 else:
889 selfdirs[d] = readsubtree(subpath(d), node)
895 selfdirs[d] = readsubtree(subpath(d), node)
890 self._lazydirs.clear()
896 self._lazydirs.clear()
891
897
892 def _loadlazy(self, d: bytes) -> None:
898 def _loadlazy(self, d: bytes) -> None:
893 v = self._lazydirs.get(d)
899 v = self._lazydirs.get(d)
894 if v is not None:
900 if v is not None:
895 node, readsubtree, docopy = v
901 node, readsubtree, docopy = v
896 if docopy:
902 if docopy:
897 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
903 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
898 else:
904 else:
899 self._dirs[d] = readsubtree(self._subpath(d), node)
905 self._dirs[d] = readsubtree(self._subpath(d), node)
900 del self._lazydirs[d]
906 del self._lazydirs[d]
901
907
902 def _loadchildrensetlazy(
908 def _loadchildrensetlazy(
903 self, visit: Union[Set[bytes], bytes]
909 self, visit: Union[Set[bytes], bytes]
904 ) -> Optional[Set[bytes]]:
910 ) -> Optional[Set[bytes]]:
905 if not visit:
911 if not visit:
906 return None
912 return None
907 if visit == b'all' or visit == b'this':
913 if visit == b'all' or visit == b'this':
908 self._loadalllazy()
914 self._loadalllazy()
909 return None
915 return None
910
916
911 visit = cast(Set[bytes], visit)
917 visit = cast(Set[bytes], visit)
912
918
913 loadlazy = self._loadlazy
919 loadlazy = self._loadlazy
914 for k in visit:
920 for k in visit:
915 loadlazy(k + b'/')
921 loadlazy(k + b'/')
916 return visit
922 return visit
917
923
918 def _loaddifflazy(self, t1: 'TreeManifest', t2: 'TreeManifest'):
924 def _loaddifflazy(self, t1: 'TreeManifest', t2: 'TreeManifest'):
919 """load items in t1 and t2 if they're needed for diffing.
925 """load items in t1 and t2 if they're needed for diffing.
920
926
921 The criteria currently is:
927 The criteria currently is:
922 - if it's not present in _lazydirs in either t1 or t2, load it in the
928 - if it's not present in _lazydirs in either t1 or t2, load it in the
923 other (it may already be loaded or it may not exist, doesn't matter)
929 other (it may already be loaded or it may not exist, doesn't matter)
924 - if it's present in _lazydirs in both, compare the nodeid; if it
930 - if it's present in _lazydirs in both, compare the nodeid; if it
925 differs, load it in both
931 differs, load it in both
926 """
932 """
927 toloadlazy = []
933 toloadlazy = []
928 for d, v1 in t1._lazydirs.items():
934 for d, v1 in t1._lazydirs.items():
929 v2 = t2._lazydirs.get(d)
935 v2 = t2._lazydirs.get(d)
930 if v2 is None or v2[0] != v1[0]:
936 if v2 is None or v2[0] != v1[0]:
931 toloadlazy.append(d)
937 toloadlazy.append(d)
932 for d, v1 in t2._lazydirs.items():
938 for d, v1 in t2._lazydirs.items():
933 if d not in t1._lazydirs:
939 if d not in t1._lazydirs:
934 toloadlazy.append(d)
940 toloadlazy.append(d)
935
941
936 for d in toloadlazy:
942 for d in toloadlazy:
937 t1._loadlazy(d)
943 t1._loadlazy(d)
938 t2._loadlazy(d)
944 t2._loadlazy(d)
939
945
940 def __len__(self) -> int:
946 def __len__(self) -> int:
941 self._load()
947 self._load()
942 size = len(self._files)
948 size = len(self._files)
943 self._loadalllazy()
949 self._loadalllazy()
944 for m in self._dirs.values():
950 for m in self._dirs.values():
945 size += m.__len__()
951 size += m.__len__()
946 return size
952 return size
947
953
948 def __nonzero__(self) -> bool:
954 def __nonzero__(self) -> bool:
949 # Faster than "__len__() != 0" since it avoids loading sub-manifests
955 # Faster than "__len__() != 0" since it avoids loading sub-manifests
950 return not self._isempty()
956 return not self._isempty()
951
957
952 __bool__ = __nonzero__
958 __bool__ = __nonzero__
953
959
954 def _isempty(self) -> bool:
960 def _isempty(self) -> bool:
955 self._load() # for consistency; already loaded by all callers
961 self._load() # for consistency; already loaded by all callers
956 # See if we can skip loading everything.
962 # See if we can skip loading everything.
957 if self._files or (
963 if self._files or (
958 self._dirs and any(not m._isempty() for m in self._dirs.values())
964 self._dirs and any(not m._isempty() for m in self._dirs.values())
959 ):
965 ):
960 return False
966 return False
961 self._loadalllazy()
967 self._loadalllazy()
962 return not self._dirs or all(m._isempty() for m in self._dirs.values())
968 return not self._dirs or all(m._isempty() for m in self._dirs.values())
963
969
964 @encoding.strmethod
970 @encoding.strmethod
965 def __repr__(self) -> bytes:
971 def __repr__(self) -> bytes:
966 return (
972 return (
967 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
973 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
968 % (
974 % (
969 self._dir,
975 self._dir,
970 hex(self._node),
976 hex(self._node),
971 bool(self._loadfunc is _noop),
977 bool(self._loadfunc is _noop),
972 self._dirty,
978 self._dirty,
973 id(self),
979 id(self),
974 )
980 )
975 )
981 )
976
982
977 def dir(self) -> bytes:
983 def dir(self) -> bytes:
978 """The directory that this tree manifest represents, including a
984 """The directory that this tree manifest represents, including a
979 trailing '/'. Empty string for the repo root directory."""
985 trailing '/'. Empty string for the repo root directory."""
980 return self._dir
986 return self._dir
981
987
982 def node(self) -> bytes:
988 def node(self) -> bytes:
983 """This node of this instance. nullid for unsaved instances. Should
989 """This node of this instance. nullid for unsaved instances. Should
984 be updated when the instance is read or written from a revlog.
990 be updated when the instance is read or written from a revlog.
985 """
991 """
986 assert not self._dirty
992 assert not self._dirty
987 return self._node
993 return self._node
988
994
989 def setnode(self, node: bytes) -> None:
995 def setnode(self, node: bytes) -> None:
990 self._node = node
996 self._node = node
991 self._dirty = False
997 self._dirty = False
992
998
993 def iterentries(
999 def iterentries(
994 self,
1000 self,
995 ) -> Iterator[Tuple[bytes, Union[bytes, 'TreeManifest'], bytes]]:
1001 ) -> Iterator[Tuple[bytes, Union[bytes, 'TreeManifest'], bytes]]:
996 self._load()
1002 self._load()
997 self._loadalllazy()
1003 self._loadalllazy()
998 for p, n in sorted(
1004 for p, n in sorted(
999 itertools.chain(self._dirs.items(), self._files.items())
1005 itertools.chain(self._dirs.items(), self._files.items())
1000 ):
1006 ):
1001 if p in self._files:
1007 if p in self._files:
1002 yield self._subpath(p), n, self._flags.get(p, b'')
1008 yield self._subpath(p), n, self._flags.get(p, b'')
1003 else:
1009 else:
1004 for x in n.iterentries():
1010 for x in n.iterentries():
1005 yield x
1011 yield x
1006
1012
1007 def items(self) -> Iterator[Tuple[bytes, Union[bytes, 'TreeManifest']]]:
1013 def items(self) -> Iterator[Tuple[bytes, Union[bytes, 'TreeManifest']]]:
1008 self._load()
1014 self._load()
1009 self._loadalllazy()
1015 self._loadalllazy()
1010 for p, n in sorted(
1016 for p, n in sorted(
1011 itertools.chain(self._dirs.items(), self._files.items())
1017 itertools.chain(self._dirs.items(), self._files.items())
1012 ):
1018 ):
1013 if p in self._files:
1019 if p in self._files:
1014 yield self._subpath(p), n
1020 yield self._subpath(p), n
1015 else:
1021 else:
1016 for f, sn in n.items():
1022 for f, sn in n.items():
1017 yield f, sn
1023 yield f, sn
1018
1024
1019 iteritems = items
1025 iteritems = items
1020
1026
1021 def iterkeys(self) -> Iterator[bytes]:
1027 def iterkeys(self) -> Iterator[bytes]:
1022 self._load()
1028 self._load()
1023 self._loadalllazy()
1029 self._loadalllazy()
1024 for p in sorted(itertools.chain(self._dirs, self._files)):
1030 for p in sorted(itertools.chain(self._dirs, self._files)):
1025 if p in self._files:
1031 if p in self._files:
1026 yield self._subpath(p)
1032 yield self._subpath(p)
1027 else:
1033 else:
1028 for f in self._dirs[p]:
1034 for f in self._dirs[p]:
1029 yield f
1035 yield f
1030
1036
1031 def keys(self) -> List[bytes]:
1037 def keys(self) -> List[bytes]:
1032 return list(self.iterkeys())
1038 return list(self.iterkeys())
1033
1039
1034 def __iter__(self) -> Iterator[bytes]:
1040 def __iter__(self) -> Iterator[bytes]:
1035 return self.iterkeys()
1041 return self.iterkeys()
1036
1042
1037 def __contains__(self, f: bytes) -> bool:
1043 def __contains__(self, f: bytes) -> bool:
1038 if f is None:
1044 if f is None:
1039 return False
1045 return False
1040 self._load()
1046 self._load()
1041 dir, subpath = _splittopdir(f)
1047 dir, subpath = _splittopdir(f)
1042 if dir:
1048 if dir:
1043 self._loadlazy(dir)
1049 self._loadlazy(dir)
1044
1050
1045 if dir not in self._dirs:
1051 if dir not in self._dirs:
1046 return False
1052 return False
1047
1053
1048 return self._dirs[dir].__contains__(subpath)
1054 return self._dirs[dir].__contains__(subpath)
1049 else:
1055 else:
1050 return f in self._files
1056 return f in self._files
1051
1057
1052 def get(self, f: bytes, default: Optional[bytes] = None) -> Optional[bytes]:
1058 def get(self, f: bytes, default: Optional[bytes] = None) -> Optional[bytes]:
1053 self._load()
1059 self._load()
1054 dir, subpath = _splittopdir(f)
1060 dir, subpath = _splittopdir(f)
1055 if dir:
1061 if dir:
1056 self._loadlazy(dir)
1062 self._loadlazy(dir)
1057
1063
1058 if dir not in self._dirs:
1064 if dir not in self._dirs:
1059 return default
1065 return default
1060 return self._dirs[dir].get(subpath, default)
1066 return self._dirs[dir].get(subpath, default)
1061 else:
1067 else:
1062 return self._files.get(f, default)
1068 return self._files.get(f, default)
1063
1069
1064 def __getitem__(self, f: bytes) -> bytes:
1070 def __getitem__(self, f: bytes) -> bytes:
1065 self._load()
1071 self._load()
1066 dir, subpath = _splittopdir(f)
1072 dir, subpath = _splittopdir(f)
1067 if dir:
1073 if dir:
1068 self._loadlazy(dir)
1074 self._loadlazy(dir)
1069
1075
1070 return self._dirs[dir].__getitem__(subpath)
1076 return self._dirs[dir].__getitem__(subpath)
1071 else:
1077 else:
1072 return self._files[f]
1078 return self._files[f]
1073
1079
1074 def flags(self, f: bytes) -> bytes:
1080 def flags(self, f: bytes) -> bytes:
1075 self._load()
1081 self._load()
1076 dir, subpath = _splittopdir(f)
1082 dir, subpath = _splittopdir(f)
1077 if dir:
1083 if dir:
1078 self._loadlazy(dir)
1084 self._loadlazy(dir)
1079
1085
1080 if dir not in self._dirs:
1086 if dir not in self._dirs:
1081 return b''
1087 return b''
1082 return self._dirs[dir].flags(subpath)
1088 return self._dirs[dir].flags(subpath)
1083 else:
1089 else:
1084 if f in self._lazydirs or f in self._dirs:
1090 if f in self._lazydirs or f in self._dirs:
1085 return b''
1091 return b''
1086 return self._flags.get(f, b'')
1092 return self._flags.get(f, b'')
1087
1093
1088 def find(self, f: bytes) -> Tuple[bytes, bytes]:
1094 def find(self, f: bytes) -> Tuple[bytes, bytes]:
1089 self._load()
1095 self._load()
1090 dir, subpath = _splittopdir(f)
1096 dir, subpath = _splittopdir(f)
1091 if dir:
1097 if dir:
1092 self._loadlazy(dir)
1098 self._loadlazy(dir)
1093
1099
1094 return self._dirs[dir].find(subpath)
1100 return self._dirs[dir].find(subpath)
1095 else:
1101 else:
1096 return self._files[f], self._flags.get(f, b'')
1102 return self._files[f], self._flags.get(f, b'')
1097
1103
1098 def __delitem__(self, f: bytes) -> None:
1104 def __delitem__(self, f: bytes) -> None:
1099 self._load()
1105 self._load()
1100 dir, subpath = _splittopdir(f)
1106 dir, subpath = _splittopdir(f)
1101 if dir:
1107 if dir:
1102 self._loadlazy(dir)
1108 self._loadlazy(dir)
1103
1109
1104 self._dirs[dir].__delitem__(subpath)
1110 self._dirs[dir].__delitem__(subpath)
1105 # If the directory is now empty, remove it
1111 # If the directory is now empty, remove it
1106 if self._dirs[dir]._isempty():
1112 if self._dirs[dir]._isempty():
1107 del self._dirs[dir]
1113 del self._dirs[dir]
1108 else:
1114 else:
1109 del self._files[f]
1115 del self._files[f]
1110 if f in self._flags:
1116 if f in self._flags:
1111 del self._flags[f]
1117 del self._flags[f]
1112 self._dirty = True
1118 self._dirty = True
1113
1119
1114 def set(self, f: bytes, node: bytes, flags: bytes) -> None:
1120 def set(self, f: bytes, node: bytes, flags: bytes) -> None:
1115 """Set both the node and the flags for path f."""
1121 """Set both the node and the flags for path f."""
1116 assert node is not None
1122 assert node is not None
1117 if flags not in _manifestflags:
1123 if flags not in _manifestflags:
1118 raise TypeError(b"Invalid manifest flag set.")
1124 raise TypeError(b"Invalid manifest flag set.")
1119 self._load()
1125 self._load()
1120 dir, subpath = _splittopdir(f)
1126 dir, subpath = _splittopdir(f)
1121 if dir:
1127 if dir:
1122 self._loadlazy(dir)
1128 self._loadlazy(dir)
1123 if dir not in self._dirs:
1129 if dir not in self._dirs:
1124 self._dirs[dir] = treemanifest(
1130 self._dirs[dir] = treemanifest(
1125 self.nodeconstants, self._subpath(dir)
1131 self.nodeconstants, self._subpath(dir)
1126 )
1132 )
1127 self._dirs[dir].set(subpath, node, flags)
1133 self._dirs[dir].set(subpath, node, flags)
1128 else:
1134 else:
1129 assert len(node) in (20, 32)
1135 assert len(node) in (20, 32)
1130 self._files[f] = node
1136 self._files[f] = node
1131 self._flags[f] = flags
1137 self._flags[f] = flags
1132 self._dirty = True
1138 self._dirty = True
1133
1139
1134 def __setitem__(self, f: bytes, n: bytes) -> None:
1140 def __setitem__(self, f: bytes, n: bytes) -> None:
1135 assert n is not None
1141 assert n is not None
1136 self._load()
1142 self._load()
1137 dir, subpath = _splittopdir(f)
1143 dir, subpath = _splittopdir(f)
1138 if dir:
1144 if dir:
1139 self._loadlazy(dir)
1145 self._loadlazy(dir)
1140 if dir not in self._dirs:
1146 if dir not in self._dirs:
1141 self._dirs[dir] = treemanifest(
1147 self._dirs[dir] = treemanifest(
1142 self.nodeconstants, self._subpath(dir)
1148 self.nodeconstants, self._subpath(dir)
1143 )
1149 )
1144 self._dirs[dir].__setitem__(subpath, n)
1150 self._dirs[dir].__setitem__(subpath, n)
1145 else:
1151 else:
1146 # manifest nodes are either 20 bytes or 32 bytes,
1152 # manifest nodes are either 20 bytes or 32 bytes,
1147 # depending on the hash in use. Assert this as historically
1153 # depending on the hash in use. Assert this as historically
1148 # sometimes extra bytes were added.
1154 # sometimes extra bytes were added.
1149 assert len(n) in (20, 32)
1155 assert len(n) in (20, 32)
1150 self._files[f] = n
1156 self._files[f] = n
1151 self._dirty = True
1157 self._dirty = True
1152
1158
1153 def _load(self) -> None:
1159 def _load(self) -> None:
1154 if self._loadfunc is not _noop:
1160 if self._loadfunc is not _noop:
1155 lf, self._loadfunc = self._loadfunc, _noop
1161 lf, self._loadfunc = self._loadfunc, _noop
1156 lf(self)
1162 lf(self)
1157 elif self._copyfunc is not _noop:
1163 elif self._copyfunc is not _noop:
1158 cf, self._copyfunc = self._copyfunc, _noop
1164 cf, self._copyfunc = self._copyfunc, _noop
1159 cf(self)
1165 cf(self)
1160
1166
1161 def setflag(self, f: bytes, flags: bytes) -> None:
1167 def setflag(self, f: bytes, flags: bytes) -> None:
1162 """Set the flags (symlink, executable) for path f."""
1168 """Set the flags (symlink, executable) for path f."""
1163 if flags not in _manifestflags:
1169 if flags not in _manifestflags:
1164 raise TypeError(b"Invalid manifest flag set.")
1170 raise TypeError(b"Invalid manifest flag set.")
1165 self._load()
1171 self._load()
1166 dir, subpath = _splittopdir(f)
1172 dir, subpath = _splittopdir(f)
1167 if dir:
1173 if dir:
1168 self._loadlazy(dir)
1174 self._loadlazy(dir)
1169 if dir not in self._dirs:
1175 if dir not in self._dirs:
1170 self._dirs[dir] = treemanifest(
1176 self._dirs[dir] = treemanifest(
1171 self.nodeconstants, self._subpath(dir)
1177 self.nodeconstants, self._subpath(dir)
1172 )
1178 )
1173 self._dirs[dir].setflag(subpath, flags)
1179 self._dirs[dir].setflag(subpath, flags)
1174 else:
1180 else:
1175 self._flags[f] = flags
1181 self._flags[f] = flags
1176 self._dirty = True
1182 self._dirty = True
1177
1183
1178 def copy(self) -> 'TreeManifest':
1184 def copy(self) -> 'TreeManifest':
1179 copy = treemanifest(self.nodeconstants, self._dir)
1185 copy = treemanifest(self.nodeconstants, self._dir)
1180 copy._node = self._node
1186 copy._node = self._node
1181 copy._dirty = self._dirty
1187 copy._dirty = self._dirty
1182 if self._copyfunc is _noop:
1188 if self._copyfunc is _noop:
1183
1189
1184 def _copyfunc(s):
1190 def _copyfunc(s):
1185 self._load()
1191 self._load()
1186 s._lazydirs = {
1192 s._lazydirs = {
1187 d: (n, r, True) for d, (n, r, c) in self._lazydirs.items()
1193 d: (n, r, True) for d, (n, r, c) in self._lazydirs.items()
1188 }
1194 }
1189 sdirs = s._dirs
1195 sdirs = s._dirs
1190 for d, v in self._dirs.items():
1196 for d, v in self._dirs.items():
1191 sdirs[d] = v.copy()
1197 sdirs[d] = v.copy()
1192 s._files = dict.copy(self._files)
1198 s._files = dict.copy(self._files)
1193 s._flags = dict.copy(self._flags)
1199 s._flags = dict.copy(self._flags)
1194
1200
1195 if self._loadfunc is _noop:
1201 if self._loadfunc is _noop:
1196 _copyfunc(copy)
1202 _copyfunc(copy)
1197 else:
1203 else:
1198 copy._copyfunc = _copyfunc
1204 copy._copyfunc = _copyfunc
1199 else:
1205 else:
1200 copy._copyfunc = self._copyfunc
1206 copy._copyfunc = self._copyfunc
1201 return copy
1207 return copy
1202
1208
1203 def filesnotin(
1209 def filesnotin(
1204 self, m2: 'TreeManifest', match: Optional[matchmod.basematcher] = None
1210 self, m2: 'TreeManifest', match: Optional[matchmod.basematcher] = None
1205 ) -> Set[bytes]:
1211 ) -> Set[bytes]:
1206 '''Set of files in this manifest that are not in the other'''
1212 '''Set of files in this manifest that are not in the other'''
1207 if match and not match.always():
1213 if match and not match.always():
1208 m1 = self._matches(match)
1214 m1 = self._matches(match)
1209 m2 = m2._matches(match)
1215 m2 = m2._matches(match)
1210 return m1.filesnotin(m2)
1216 return m1.filesnotin(m2)
1211
1217
1212 files = set()
1218 files = set()
1213
1219
1214 def _filesnotin(t1, t2):
1220 def _filesnotin(t1, t2):
1215 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1221 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1216 return
1222 return
1217 t1._load()
1223 t1._load()
1218 t2._load()
1224 t2._load()
1219 self._loaddifflazy(t1, t2)
1225 self._loaddifflazy(t1, t2)
1220 for d, m1 in t1._dirs.items():
1226 for d, m1 in t1._dirs.items():
1221 if d in t2._dirs:
1227 if d in t2._dirs:
1222 m2 = t2._dirs[d]
1228 m2 = t2._dirs[d]
1223 _filesnotin(m1, m2)
1229 _filesnotin(m1, m2)
1224 else:
1230 else:
1225 files.update(m1.iterkeys())
1231 files.update(m1.iterkeys())
1226
1232
1227 for fn in t1._files:
1233 for fn in t1._files:
1228 if fn not in t2._files:
1234 if fn not in t2._files:
1229 files.add(t1._subpath(fn))
1235 files.add(t1._subpath(fn))
1230
1236
1231 _filesnotin(self, m2)
1237 _filesnotin(self, m2)
1232 return files
1238 return files
1233
1239
1234 @propertycache
1240 @propertycache
1235 def _alldirs(self) -> pathutil.dirs:
1241 def _alldirs(self) -> pathutil.dirs:
1236 return pathutil.dirs(self)
1242 return pathutil.dirs(self)
1237
1243
1238 def dirs(self) -> pathutil.dirs:
1244 def dirs(self) -> pathutil.dirs:
1239 return self._alldirs
1245 return self._alldirs
1240
1246
1241 def hasdir(self, dir: bytes) -> bool:
1247 def hasdir(self, dir: bytes) -> bool:
1242 self._load()
1248 self._load()
1243 topdir, subdir = _splittopdir(dir)
1249 topdir, subdir = _splittopdir(dir)
1244 if topdir:
1250 if topdir:
1245 self._loadlazy(topdir)
1251 self._loadlazy(topdir)
1246 if topdir in self._dirs:
1252 if topdir in self._dirs:
1247 return self._dirs[topdir].hasdir(subdir)
1253 return self._dirs[topdir].hasdir(subdir)
1248 return False
1254 return False
1249 dirslash = dir + b'/'
1255 dirslash = dir + b'/'
1250 return dirslash in self._dirs or dirslash in self._lazydirs
1256 return dirslash in self._dirs or dirslash in self._lazydirs
1251
1257
1252 def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
1258 def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
1253 """Generates matching file names.
1259 """Generates matching file names.
1254
1260
1255 It also reports nonexistent files by marking them bad with match.bad().
1261 It also reports nonexistent files by marking them bad with match.bad().
1256 """
1262 """
1257 if match.always():
1263 if match.always():
1258 for f in iter(self):
1264 for f in iter(self):
1259 yield f
1265 yield f
1260 return
1266 return
1261
1267
1262 fset = set(match.files())
1268 fset = set(match.files())
1263
1269
1264 for fn in self._walk(match):
1270 for fn in self._walk(match):
1265 if fn in fset:
1271 if fn in fset:
1266 # specified pattern is the exact name
1272 # specified pattern is the exact name
1267 fset.remove(fn)
1273 fset.remove(fn)
1268 yield fn
1274 yield fn
1269
1275
1270 # for dirstate.walk, files=[''] means "walk the whole tree".
1276 # for dirstate.walk, files=[''] means "walk the whole tree".
1271 # follow that here, too
1277 # follow that here, too
1272 fset.discard(b'')
1278 fset.discard(b'')
1273
1279
1274 for fn in sorted(fset):
1280 for fn in sorted(fset):
1275 if not self.hasdir(fn):
1281 if not self.hasdir(fn):
1276 match.bad(fn, None)
1282 match.bad(fn, None)
1277
1283
1278 def _walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
1284 def _walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
1279 '''Recursively generates matching file names for walk().'''
1285 '''Recursively generates matching file names for walk().'''
1280 visit = match.visitchildrenset(self._dir[:-1])
1286 visit = match.visitchildrenset(self._dir[:-1])
1281 if not visit:
1287 if not visit:
1282 return
1288 return
1283
1289
1284 # yield this dir's files and walk its submanifests
1290 # yield this dir's files and walk its submanifests
1285 self._load()
1291 self._load()
1286 visit = self._loadchildrensetlazy(visit)
1292 visit = self._loadchildrensetlazy(visit)
1287 for p in sorted(list(self._dirs) + list(self._files)):
1293 for p in sorted(list(self._dirs) + list(self._files)):
1288 if p in self._files:
1294 if p in self._files:
1289 fullp = self._subpath(p)
1295 fullp = self._subpath(p)
1290 if match(fullp):
1296 if match(fullp):
1291 yield fullp
1297 yield fullp
1292 else:
1298 else:
1293 if not visit or p[:-1] in visit:
1299 if not visit or p[:-1] in visit:
1294 for f in self._dirs[p]._walk(match):
1300 for f in self._dirs[p]._walk(match):
1295 yield f
1301 yield f
1296
1302
1297 def _matches(self, match: matchmod.basematcher) -> 'TreeManifest':
1303 def _matches(self, match: matchmod.basematcher) -> 'TreeManifest':
1298 """recursively generate a new manifest filtered by the match argument."""
1304 """recursively generate a new manifest filtered by the match argument."""
1299 if match.always():
1305 if match.always():
1300 return self.copy()
1306 return self.copy()
1301 return self._matches_inner(match)
1307 return self._matches_inner(match)
1302
1308
1303 def _matches_inner(self, match: matchmod.basematcher) -> 'TreeManifest':
1309 def _matches_inner(self, match: matchmod.basematcher) -> 'TreeManifest':
1304 if match.always():
1310 if match.always():
1305 return self.copy()
1311 return self.copy()
1306
1312
1307 visit = match.visitchildrenset(self._dir[:-1])
1313 visit = match.visitchildrenset(self._dir[:-1])
1308 if visit == b'all':
1314 if visit == b'all':
1309 return self.copy()
1315 return self.copy()
1310 ret = treemanifest(self.nodeconstants, self._dir)
1316 ret = treemanifest(self.nodeconstants, self._dir)
1311 if not visit:
1317 if not visit:
1312 return ret
1318 return ret
1313
1319
1314 self._load()
1320 self._load()
1315 for fn in self._files:
1321 for fn in self._files:
1316 # While visitchildrenset *usually* lists only subdirs, this is
1322 # While visitchildrenset *usually* lists only subdirs, this is
1317 # actually up to the matcher and may have some files in the set().
1323 # actually up to the matcher and may have some files in the set().
1318 # If visit == 'this', we should obviously look at the files in this
1324 # If visit == 'this', we should obviously look at the files in this
1319 # directory; if visit is a set, and fn is in it, we should inspect
1325 # directory; if visit is a set, and fn is in it, we should inspect
1320 # fn (but no need to inspect things not in the set).
1326 # fn (but no need to inspect things not in the set).
1321 if visit != b'this' and fn not in visit:
1327 if visit != b'this' and fn not in visit:
1322 continue
1328 continue
1323 fullp = self._subpath(fn)
1329 fullp = self._subpath(fn)
1324 # visitchildrenset isn't perfect, we still need to call the regular
1330 # visitchildrenset isn't perfect, we still need to call the regular
1325 # matcher code to further filter results.
1331 # matcher code to further filter results.
1326 if not match(fullp):
1332 if not match(fullp):
1327 continue
1333 continue
1328 ret._files[fn] = self._files[fn]
1334 ret._files[fn] = self._files[fn]
1329 if fn in self._flags:
1335 if fn in self._flags:
1330 ret._flags[fn] = self._flags[fn]
1336 ret._flags[fn] = self._flags[fn]
1331
1337
1332 visit = self._loadchildrensetlazy(visit)
1338 visit = self._loadchildrensetlazy(visit)
1333 for dir, subm in self._dirs.items():
1339 for dir, subm in self._dirs.items():
1334 if visit and dir[:-1] not in visit:
1340 if visit and dir[:-1] not in visit:
1335 continue
1341 continue
1336 m = subm._matches_inner(match)
1342 m = subm._matches_inner(match)
1337 if not m._isempty():
1343 if not m._isempty():
1338 ret._dirs[dir] = m
1344 ret._dirs[dir] = m
1339
1345
1340 if not ret._isempty():
1346 if not ret._isempty():
1341 ret._dirty = True
1347 ret._dirty = True
1342 return ret
1348 return ret
1343
1349
1344 def fastdelta(
1350 def fastdelta(
1345 self, base: ByteString, changes: Iterable[Tuple[bytes, bool]]
1351 self, base: ByteString, changes: Iterable[Tuple[bytes, bool]]
1346 ) -> ByteString:
1352 ) -> ByteString:
1347 raise FastdeltaUnavailable()
1353 raise FastdeltaUnavailable()
1348
1354
1349 def diff(
1355 def diff(
1350 self,
1356 self,
1351 m2: 'TreeManifest',
1357 m2: 'TreeManifest',
1352 match: Optional[matchmod.basematcher] = None,
1358 match: Optional[matchmod.basematcher] = None,
1353 clean: bool = False,
1359 clean: bool = False,
1354 ) -> Dict[
1360 ) -> Dict[
1355 bytes,
1361 bytes,
1356 Optional[
1362 Optional[
1357 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
1363 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
1358 ],
1364 ],
1359 ]:
1365 ]:
1360 """Finds changes between the current manifest and m2.
1366 """Finds changes between the current manifest and m2.
1361
1367
1362 Args:
1368 Args:
1363 m2: the manifest to which this manifest should be compared.
1369 m2: the manifest to which this manifest should be compared.
1364 clean: if true, include files unchanged between these manifests
1370 clean: if true, include files unchanged between these manifests
1365 with a None value in the returned dictionary.
1371 with a None value in the returned dictionary.
1366
1372
1367 The result is returned as a dict with filename as key and
1373 The result is returned as a dict with filename as key and
1368 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1374 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1369 nodeid in the current/other manifest and fl1/fl2 is the flag
1375 nodeid in the current/other manifest and fl1/fl2 is the flag
1370 in the current/other manifest. Where the file does not exist,
1376 in the current/other manifest. Where the file does not exist,
1371 the nodeid will be None and the flags will be the empty
1377 the nodeid will be None and the flags will be the empty
1372 string.
1378 string.
1373 """
1379 """
1374 if match and not match.always():
1380 if match and not match.always():
1375 m1 = self._matches(match)
1381 m1 = self._matches(match)
1376 m2 = m2._matches(match)
1382 m2 = m2._matches(match)
1377 return m1.diff(m2, clean=clean)
1383 return m1.diff(m2, clean=clean)
1378 result = {}
1384 result = {}
1379 emptytree = treemanifest(self.nodeconstants)
1385 emptytree = treemanifest(self.nodeconstants)
1380
1386
1381 def _iterativediff(t1, t2, stack):
1387 def _iterativediff(t1, t2, stack):
1382 """compares two tree manifests and append new tree-manifests which
1388 """compares two tree manifests and append new tree-manifests which
1383 needs to be compared to stack"""
1389 needs to be compared to stack"""
1384 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1390 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1385 return
1391 return
1386 t1._load()
1392 t1._load()
1387 t2._load()
1393 t2._load()
1388 self._loaddifflazy(t1, t2)
1394 self._loaddifflazy(t1, t2)
1389
1395
1390 for d, m1 in t1._dirs.items():
1396 for d, m1 in t1._dirs.items():
1391 m2 = t2._dirs.get(d, emptytree)
1397 m2 = t2._dirs.get(d, emptytree)
1392 stack.append((m1, m2))
1398 stack.append((m1, m2))
1393
1399
1394 for d, m2 in t2._dirs.items():
1400 for d, m2 in t2._dirs.items():
1395 if d not in t1._dirs:
1401 if d not in t1._dirs:
1396 stack.append((emptytree, m2))
1402 stack.append((emptytree, m2))
1397
1403
1398 for fn, n1 in t1._files.items():
1404 for fn, n1 in t1._files.items():
1399 fl1 = t1._flags.get(fn, b'')
1405 fl1 = t1._flags.get(fn, b'')
1400 n2 = t2._files.get(fn, None)
1406 n2 = t2._files.get(fn, None)
1401 fl2 = t2._flags.get(fn, b'')
1407 fl2 = t2._flags.get(fn, b'')
1402 if n1 != n2 or fl1 != fl2:
1408 if n1 != n2 or fl1 != fl2:
1403 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1409 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1404 elif clean:
1410 elif clean:
1405 result[t1._subpath(fn)] = None
1411 result[t1._subpath(fn)] = None
1406
1412
1407 for fn, n2 in t2._files.items():
1413 for fn, n2 in t2._files.items():
1408 if fn not in t1._files:
1414 if fn not in t1._files:
1409 fl2 = t2._flags.get(fn, b'')
1415 fl2 = t2._flags.get(fn, b'')
1410 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1416 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1411
1417
1412 stackls = []
1418 stackls = []
1413 _iterativediff(self, m2, stackls)
1419 _iterativediff(self, m2, stackls)
1414 while stackls:
1420 while stackls:
1415 t1, t2 = stackls.pop()
1421 t1, t2 = stackls.pop()
1416 # stackls is populated in the function call
1422 # stackls is populated in the function call
1417 _iterativediff(t1, t2, stackls)
1423 _iterativediff(t1, t2, stackls)
1418 return result
1424 return result
1419
1425
1420 def unmodifiedsince(self, m2: 'TreeManifest') -> bool:
1426 def unmodifiedsince(self, m2: 'TreeManifest') -> bool:
1421 return not self._dirty and not m2._dirty and self._node == m2._node
1427 return not self._dirty and not m2._dirty and self._node == m2._node
1422
1428
1423 def parse(
1429 def parse(
1424 self,
1430 self,
1425 text: bytes,
1431 text: bytes,
1426 readsubtree: Callable[[bytes, bytes], 'TreeManifest'],
1432 readsubtree: Callable[[bytes, bytes], 'TreeManifest'],
1427 ) -> None:
1433 ) -> None:
1428 selflazy = self._lazydirs
1434 selflazy = self._lazydirs
1429 for f, n, fl in _parse(self._nodelen, text):
1435 for f, n, fl in _parse(self._nodelen, text):
1430 if fl == b't':
1436 if fl == b't':
1431 f = f + b'/'
1437 f = f + b'/'
1432 # False below means "doesn't need to be copied" and can use the
1438 # False below means "doesn't need to be copied" and can use the
1433 # cached value from readsubtree directly.
1439 # cached value from readsubtree directly.
1434 selflazy[f] = (n, readsubtree, False)
1440 selflazy[f] = (n, readsubtree, False)
1435 elif b'/' in f:
1441 elif b'/' in f:
1436 # This is a flat manifest, so use __setitem__ and setflag rather
1442 # This is a flat manifest, so use __setitem__ and setflag rather
1437 # than assigning directly to _files and _flags, so we can
1443 # than assigning directly to _files and _flags, so we can
1438 # assign a path in a subdirectory, and to mark dirty (compared
1444 # assign a path in a subdirectory, and to mark dirty (compared
1439 # to nullid).
1445 # to nullid).
1440 self[f] = n
1446 self[f] = n
1441 if fl:
1447 if fl:
1442 self.setflag(f, fl)
1448 self.setflag(f, fl)
1443 else:
1449 else:
1444 # Assigning to _files and _flags avoids marking as dirty,
1450 # Assigning to _files and _flags avoids marking as dirty,
1445 # and should be a little faster.
1451 # and should be a little faster.
1446 self._files[f] = n
1452 self._files[f] = n
1447 if fl:
1453 if fl:
1448 self._flags[f] = fl
1454 self._flags[f] = fl
1449
1455
1450 def text(self) -> ByteString:
1456 def text(self) -> ByteString:
1451 """Get the full data of this manifest as a bytestring."""
1457 """Get the full data of this manifest as a bytestring."""
1452 self._load()
1458 self._load()
1453 return _text(self.iterentries())
1459 return _text(self.iterentries())
1454
1460
1455 def dirtext(self) -> ByteString:
1461 def dirtext(self) -> ByteString:
1456 """Get the full data of this directory as a bytestring. Make sure that
1462 """Get the full data of this directory as a bytestring. Make sure that
1457 any submanifests have been written first, so their nodeids are correct.
1463 any submanifests have been written first, so their nodeids are correct.
1458 """
1464 """
1459 self._load()
1465 self._load()
1460 flags = self.flags
1466 flags = self.flags
1461 lazydirs = [(d[:-1], v[0], b't') for d, v in self._lazydirs.items()]
1467 lazydirs = [(d[:-1], v[0], b't') for d, v in self._lazydirs.items()]
1462 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1468 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1463 files = [(f, self._files[f], flags(f)) for f in self._files]
1469 files = [(f, self._files[f], flags(f)) for f in self._files]
1464 return _text(sorted(dirs + files + lazydirs))
1470 return _text(sorted(dirs + files + lazydirs))
1465
1471
1466 def read(
1472 def read(
1467 self,
1473 self,
1468 gettext: Callable[[], ByteString],
1474 gettext: Callable[[], ByteString],
1469 readsubtree: Callable[[bytes, bytes], 'TreeManifest'],
1475 readsubtree: Callable[[bytes, bytes], 'TreeManifest'],
1470 ) -> None:
1476 ) -> None:
1471 def _load_for_read(s):
1477 def _load_for_read(s):
1472 s.parse(gettext(), readsubtree)
1478 s.parse(gettext(), readsubtree)
1473 s._dirty = False
1479 s._dirty = False
1474
1480
1475 self._loadfunc = _load_for_read
1481 self._loadfunc = _load_for_read
1476
1482
1477 def writesubtrees(
1483 def writesubtrees(
1478 self,
1484 self,
1479 m1: 'TreeManifest',
1485 m1: 'TreeManifest',
1480 m2: 'TreeManifest',
1486 m2: 'TreeManifest',
1481 writesubtree: Callable[
1487 writesubtree: Callable[
1482 [
1488 [
1483 Callable[['TreeManifest'], None],
1489 Callable[['TreeManifest'], None],
1484 bytes,
1490 bytes,
1485 bytes,
1491 bytes,
1486 matchmod.basematcher,
1492 matchmod.basematcher,
1487 ],
1493 ],
1488 None,
1494 None,
1489 ],
1495 ],
1490 match: matchmod.basematcher,
1496 match: matchmod.basematcher,
1491 ) -> None:
1497 ) -> None:
1492 self._load() # for consistency; should never have any effect here
1498 self._load() # for consistency; should never have any effect here
1493 m1._load()
1499 m1._load()
1494 m2._load()
1500 m2._load()
1495 emptytree = treemanifest(self.nodeconstants)
1501 emptytree = treemanifest(self.nodeconstants)
1496
1502
1497 def getnode(m, d):
1503 def getnode(m, d):
1498 ld = m._lazydirs.get(d)
1504 ld = m._lazydirs.get(d)
1499 if ld:
1505 if ld:
1500 return ld[0]
1506 return ld[0]
1501 tree = m._dirs.get(d, emptytree)
1507 tree = m._dirs.get(d, emptytree)
1502 assert tree is not None # helps pytype
1508 assert tree is not None # helps pytype
1503 return tree._node
1509 return tree._node
1504
1510
1505 # let's skip investigating things that `match` says we do not need.
1511 # let's skip investigating things that `match` says we do not need.
1506 visit = match.visitchildrenset(self._dir[:-1])
1512 visit = match.visitchildrenset(self._dir[:-1])
1507 visit = self._loadchildrensetlazy(visit)
1513 visit = self._loadchildrensetlazy(visit)
1508 if visit == b'this' or visit == b'all':
1514 if visit == b'this' or visit == b'all':
1509 visit = None
1515 visit = None
1510 for d, subm in self._dirs.items():
1516 for d, subm in self._dirs.items():
1511 if visit and d[:-1] not in visit:
1517 if visit and d[:-1] not in visit:
1512 continue
1518 continue
1513 subp1 = getnode(m1, d)
1519 subp1 = getnode(m1, d)
1514 subp2 = getnode(m2, d)
1520 subp2 = getnode(m2, d)
1515 if subp1 == self.nodeconstants.nullid:
1521 if subp1 == self.nodeconstants.nullid:
1516 subp1, subp2 = subp2, subp1
1522 subp1, subp2 = subp2, subp1
1517 writesubtree(subm, subp1, subp2, match)
1523 writesubtree(subm, subp1, subp2, match)
1518
1524
1519 def walksubtrees(
1525 def walksubtrees(
1520 self, matcher: Optional[matchmod.basematcher] = None
1526 self, matcher: Optional[matchmod.basematcher] = None
1521 ) -> Iterator['TreeManifest']:
1527 ) -> Iterator['TreeManifest']:
1522 """Returns an iterator of the subtrees of this manifest, including this
1528 """Returns an iterator of the subtrees of this manifest, including this
1523 manifest itself.
1529 manifest itself.
1524
1530
1525 If `matcher` is provided, it only returns subtrees that match.
1531 If `matcher` is provided, it only returns subtrees that match.
1526 """
1532 """
1527 if matcher and not matcher.visitdir(self._dir[:-1]):
1533 if matcher and not matcher.visitdir(self._dir[:-1]):
1528 return
1534 return
1529 if not matcher or matcher(self._dir[:-1]):
1535 if not matcher or matcher(self._dir[:-1]):
1530 yield self
1536 yield self
1531
1537
1532 self._load()
1538 self._load()
1533 # OPT: use visitchildrenset to avoid loading everything.
1539 # OPT: use visitchildrenset to avoid loading everything.
1534 self._loadalllazy()
1540 self._loadalllazy()
1535 for d, subm in self._dirs.items():
1541 for d, subm in self._dirs.items():
1536 for subtree in subm.walksubtrees(matcher=matcher):
1542 for subtree in subm.walksubtrees(matcher=matcher):
1537 yield subtree
1543 yield subtree
1538
1544
1539
1545
1540 treemanifest = interfaceutil.implementer(repository.imanifestdict)(TreeManifest)
1546 treemanifest = interfaceutil.implementer(repository.imanifestdict)(TreeManifest)
1541
1547
1542 if typing.TYPE_CHECKING:
1548 if typing.TYPE_CHECKING:
1543 treemanifest = TreeManifest
1549 treemanifest = TreeManifest
1544
1550
1545
1551
1546 class manifestfulltextcache(util.lrucachedict):
1552 class manifestfulltextcache(util.lrucachedict):
1547 """File-backed LRU cache for the manifest cache
1553 """File-backed LRU cache for the manifest cache
1548
1554
1549 File consists of entries, up to EOF:
1555 File consists of entries, up to EOF:
1550
1556
1551 - 20 bytes node, 4 bytes length, <length> manifest data
1557 - 20 bytes node, 4 bytes length, <length> manifest data
1552
1558
1553 These are written in reverse cache order (oldest to newest).
1559 These are written in reverse cache order (oldest to newest).
1554
1560
1555 """
1561 """
1556
1562
1557 _file = b'manifestfulltextcache'
1563 _file = b'manifestfulltextcache'
1558
1564
1559 def __init__(self, max):
1565 def __init__(self, max):
1560 super(manifestfulltextcache, self).__init__(max)
1566 super(manifestfulltextcache, self).__init__(max)
1561 self._dirty = False
1567 self._dirty = False
1562 self._read = False
1568 self._read = False
1563 self._opener = None
1569 self._opener = None
1564
1570
1565 def read(self):
1571 def read(self):
1566 if self._read or self._opener is None:
1572 if self._read or self._opener is None:
1567 return
1573 return
1568
1574
1569 try:
1575 try:
1570 with self._opener(self._file) as fp:
1576 with self._opener(self._file) as fp:
1571 set = super(manifestfulltextcache, self).__setitem__
1577 set = super(manifestfulltextcache, self).__setitem__
1572 # ignore trailing data, this is a cache, corruption is skipped
1578 # ignore trailing data, this is a cache, corruption is skipped
1573 while True:
1579 while True:
1574 # TODO do we need to do work here for sha1 portability?
1580 # TODO do we need to do work here for sha1 portability?
1575 node = fp.read(20)
1581 node = fp.read(20)
1576 if len(node) < 20:
1582 if len(node) < 20:
1577 break
1583 break
1578 try:
1584 try:
1579 size = struct.unpack(b'>L', fp.read(4))[0]
1585 size = struct.unpack(b'>L', fp.read(4))[0]
1580 except struct.error:
1586 except struct.error:
1581 break
1587 break
1582 value = bytearray(fp.read(size))
1588 value = bytearray(fp.read(size))
1583 if len(value) != size:
1589 if len(value) != size:
1584 break
1590 break
1585 set(node, value)
1591 set(node, value)
1586 except IOError:
1592 except IOError:
1587 # the file is allowed to be missing
1593 # the file is allowed to be missing
1588 pass
1594 pass
1589
1595
1590 self._read = True
1596 self._read = True
1591 self._dirty = False
1597 self._dirty = False
1592
1598
1593 def write(self):
1599 def write(self):
1594 if not self._dirty or self._opener is None:
1600 if not self._dirty or self._opener is None:
1595 return
1601 return
1596 # rotate backwards to the first used node
1602 # rotate backwards to the first used node
1597 try:
1603 try:
1598 with self._opener(
1604 with self._opener(
1599 self._file, b'w', atomictemp=True, checkambig=True
1605 self._file, b'w', atomictemp=True, checkambig=True
1600 ) as fp:
1606 ) as fp:
1601 node = self._head.prev
1607 node = self._head.prev
1602 while True:
1608 while True:
1603 if node.key in self._cache:
1609 if node.key in self._cache:
1604 fp.write(node.key)
1610 fp.write(node.key)
1605 fp.write(struct.pack(b'>L', len(node.value)))
1611 fp.write(struct.pack(b'>L', len(node.value)))
1606 fp.write(node.value)
1612 fp.write(node.value)
1607 if node is self._head:
1613 if node is self._head:
1608 break
1614 break
1609 node = node.prev
1615 node = node.prev
1610 except IOError:
1616 except IOError:
1611 # We could not write the cache (eg: permission error)
1617 # We could not write the cache (eg: permission error)
1612 # the content can be missing.
1618 # the content can be missing.
1613 #
1619 #
1614 # We could try harder and see if we could recreate a wcache
1620 # We could try harder and see if we could recreate a wcache
1615 # directory were we coudl write too.
1621 # directory were we coudl write too.
1616 #
1622 #
1617 # XXX the error pass silently, having some way to issue an error
1623 # XXX the error pass silently, having some way to issue an error
1618 # log `ui.log` would be nice.
1624 # log `ui.log` would be nice.
1619 pass
1625 pass
1620
1626
1621 def __len__(self):
1627 def __len__(self):
1622 if not self._read:
1628 if not self._read:
1623 self.read()
1629 self.read()
1624 return super(manifestfulltextcache, self).__len__()
1630 return super(manifestfulltextcache, self).__len__()
1625
1631
1626 def __contains__(self, k):
1632 def __contains__(self, k):
1627 if not self._read:
1633 if not self._read:
1628 self.read()
1634 self.read()
1629 return super(manifestfulltextcache, self).__contains__(k)
1635 return super(manifestfulltextcache, self).__contains__(k)
1630
1636
1631 def __iter__(self):
1637 def __iter__(self):
1632 if not self._read:
1638 if not self._read:
1633 self.read()
1639 self.read()
1634 return super(manifestfulltextcache, self).__iter__()
1640 return super(manifestfulltextcache, self).__iter__()
1635
1641
1636 def __getitem__(self, k):
1642 def __getitem__(self, k):
1637 if not self._read:
1643 if not self._read:
1638 self.read()
1644 self.read()
1639 # the cache lru order can change on read
1645 # the cache lru order can change on read
1640 setdirty = self._cache.get(k) is not self._head
1646 setdirty = self._cache.get(k) is not self._head
1641 value = super(manifestfulltextcache, self).__getitem__(k)
1647 value = super(manifestfulltextcache, self).__getitem__(k)
1642 if setdirty:
1648 if setdirty:
1643 self._dirty = True
1649 self._dirty = True
1644 return value
1650 return value
1645
1651
1646 def __setitem__(self, k, v):
1652 def __setitem__(self, k, v):
1647 if not self._read:
1653 if not self._read:
1648 self.read()
1654 self.read()
1649 super(manifestfulltextcache, self).__setitem__(k, v)
1655 super(manifestfulltextcache, self).__setitem__(k, v)
1650 self._dirty = True
1656 self._dirty = True
1651
1657
1652 def __delitem__(self, k):
1658 def __delitem__(self, k):
1653 if not self._read:
1659 if not self._read:
1654 self.read()
1660 self.read()
1655 super(manifestfulltextcache, self).__delitem__(k)
1661 super(manifestfulltextcache, self).__delitem__(k)
1656 self._dirty = True
1662 self._dirty = True
1657
1663
1658 def get(self, k, default=None):
1664 def get(self, k, default=None):
1659 if not self._read:
1665 if not self._read:
1660 self.read()
1666 self.read()
1661 return super(manifestfulltextcache, self).get(k, default=default)
1667 return super(manifestfulltextcache, self).get(k, default=default)
1662
1668
1663 def clear(self, clear_persisted_data=False):
1669 def clear(self, clear_persisted_data=False):
1664 super(manifestfulltextcache, self).clear()
1670 super(manifestfulltextcache, self).clear()
1665 if clear_persisted_data:
1671 if clear_persisted_data:
1666 self._dirty = True
1672 self._dirty = True
1667 self.write()
1673 self.write()
1668 self._read = False
1674 self._read = False
1669
1675
1670
1676
1671 # and upper bound of what we expect from compression
1677 # and upper bound of what we expect from compression
1672 # (real live value seems to be "3")
1678 # (real live value seems to be "3")
1673 MAXCOMPRESSION = 3
1679 MAXCOMPRESSION = 3
1674
1680
1675
1681
1676 class FastdeltaUnavailable(Exception):
1682 class FastdeltaUnavailable(Exception):
1677 """Exception raised when fastdelta isn't usable on a manifest."""
1683 """Exception raised when fastdelta isn't usable on a manifest."""
1678
1684
1679
1685
1680 class ManifestRevlog:
1686 class ManifestRevlog:
1681 """A revlog that stores manifest texts. This is responsible for caching the
1687 """A revlog that stores manifest texts. This is responsible for caching the
1682 full-text manifest contents.
1688 full-text manifest contents.
1683 """
1689 """
1684
1690
1685 def __init__(
1691 def __init__(
1686 self,
1692 self,
1687 nodeconstants,
1693 nodeconstants,
1688 opener,
1694 opener,
1689 tree=b'',
1695 tree=b'',
1690 dirlogcache=None,
1696 dirlogcache=None,
1691 treemanifest=False,
1697 treemanifest=False,
1692 ):
1698 ):
1693 """Constructs a new manifest revlog
1699 """Constructs a new manifest revlog
1694
1700
1695 `indexfile` - used by extensions to have two manifests at once, like
1701 `indexfile` - used by extensions to have two manifests at once, like
1696 when transitioning between flatmanifeset and treemanifests.
1702 when transitioning between flatmanifeset and treemanifests.
1697
1703
1698 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1704 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1699 options can also be used to make this a tree manifest revlog. The opener
1705 options can also be used to make this a tree manifest revlog. The opener
1700 option takes precedence, so if it is set to True, we ignore whatever
1706 option takes precedence, so if it is set to True, we ignore whatever
1701 value is passed in to the constructor.
1707 value is passed in to the constructor.
1702 """
1708 """
1703 self.nodeconstants = nodeconstants
1709 self.nodeconstants = nodeconstants
1704 # During normal operations, we expect to deal with not more than four
1710 # During normal operations, we expect to deal with not more than four
1705 # revs at a time (such as during commit --amend). When rebasing large
1711 # revs at a time (such as during commit --amend). When rebasing large
1706 # stacks of commits, the number can go up, hence the config knob below.
1712 # stacks of commits, the number can go up, hence the config knob below.
1707 cachesize = 4
1713 cachesize = 4
1708 optiontreemanifest = False
1714 optiontreemanifest = False
1709 persistentnodemap = False
1715 persistentnodemap = False
1710 opts = getattr(opener, 'options', None)
1716 opts = getattr(opener, 'options', None)
1711 if opts is not None:
1717 if opts is not None:
1712 cachesize = opts.get(b'manifestcachesize', cachesize)
1718 cachesize = opts.get(b'manifestcachesize', cachesize)
1713 optiontreemanifest = opts.get(b'treemanifest', False)
1719 optiontreemanifest = opts.get(b'treemanifest', False)
1714 persistentnodemap = opts.get(b'persistent-nodemap', False)
1720 persistentnodemap = opts.get(b'persistent-nodemap', False)
1715
1721
1716 self._treeondisk = optiontreemanifest or treemanifest
1722 self._treeondisk = optiontreemanifest or treemanifest
1717
1723
1718 self._fulltextcache = manifestfulltextcache(cachesize)
1724 self._fulltextcache = manifestfulltextcache(cachesize)
1719
1725
1720 if tree:
1726 if tree:
1721 assert self._treeondisk, (tree, b'opts is %r' % opts)
1727 assert self._treeondisk, (tree, b'opts is %r' % opts)
1722
1728
1723 radix = b'00manifest'
1729 radix = b'00manifest'
1724 if tree:
1730 if tree:
1725 radix = b"meta/" + tree + radix
1731 radix = b"meta/" + tree + radix
1726
1732
1727 self.tree = tree
1733 self.tree = tree
1728
1734
1729 # The dirlogcache is kept on the root manifest log
1735 # The dirlogcache is kept on the root manifest log
1730 if tree:
1736 if tree:
1731 self._dirlogcache = dirlogcache
1737 self._dirlogcache = dirlogcache
1732 else:
1738 else:
1733 self._dirlogcache = {b'': self}
1739 self._dirlogcache = {b'': self}
1734
1740
1735 self._revlog = revlog.revlog(
1741 self._revlog = revlog.revlog(
1736 opener,
1742 opener,
1737 target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
1743 target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
1738 radix=radix,
1744 radix=radix,
1739 # only root indexfile is cached
1745 # only root indexfile is cached
1740 checkambig=not bool(tree),
1746 checkambig=not bool(tree),
1741 mmaplargeindex=True,
1747 mmaplargeindex=True,
1742 upperboundcomp=MAXCOMPRESSION,
1748 upperboundcomp=MAXCOMPRESSION,
1743 persistentnodemap=persistentnodemap,
1749 persistentnodemap=persistentnodemap,
1744 )
1750 )
1745
1751
1746 self.index = self._revlog.index
1752 self.index = self._revlog.index
1747
1753
1748 def get_revlog(self):
1754 def get_revlog(self):
1749 """return an actual revlog instance if any
1755 """return an actual revlog instance if any
1750
1756
1751 This exist because a lot of code leverage the fact the underlying
1757 This exist because a lot of code leverage the fact the underlying
1752 storage is a revlog for optimization, so giving simple way to access
1758 storage is a revlog for optimization, so giving simple way to access
1753 the revlog instance helps such code.
1759 the revlog instance helps such code.
1754 """
1760 """
1755 return self._revlog
1761 return self._revlog
1756
1762
1757 def _setupmanifestcachehooks(self, repo):
1763 def _setupmanifestcachehooks(self, repo):
1758 """Persist the manifestfulltextcache on lock release"""
1764 """Persist the manifestfulltextcache on lock release"""
1759 if not hasattr(repo, '_wlockref'):
1765 if not hasattr(repo, '_wlockref'):
1760 return
1766 return
1761
1767
1762 self._fulltextcache._opener = repo.wcachevfs
1768 self._fulltextcache._opener = repo.wcachevfs
1763 if repo._currentlock(repo._wlockref) is None:
1769 if repo._currentlock(repo._wlockref) is None:
1764 return
1770 return
1765
1771
1766 reporef = weakref.ref(repo)
1772 reporef = weakref.ref(repo)
1767 manifestrevlogref = weakref.ref(self)
1773 manifestrevlogref = weakref.ref(self)
1768
1774
1769 def persistmanifestcache(success):
1775 def persistmanifestcache(success):
1770 # Repo is in an unknown state, do not persist.
1776 # Repo is in an unknown state, do not persist.
1771 if not success:
1777 if not success:
1772 return
1778 return
1773
1779
1774 repo = reporef()
1780 repo = reporef()
1775 self = manifestrevlogref()
1781 self = manifestrevlogref()
1776 if repo is None or self is None:
1782 if repo is None or self is None:
1777 return
1783 return
1778 if repo.manifestlog.getstorage(b'') is not self:
1784 if repo.manifestlog.getstorage(b'') is not self:
1779 # there's a different manifest in play now, abort
1785 # there's a different manifest in play now, abort
1780 return
1786 return
1781 self._fulltextcache.write()
1787 self._fulltextcache.write()
1782
1788
1783 repo._afterlock(persistmanifestcache)
1789 repo._afterlock(persistmanifestcache)
1784
1790
1785 @property
1791 @property
1786 def fulltextcache(self):
1792 def fulltextcache(self):
1787 return self._fulltextcache
1793 return self._fulltextcache
1788
1794
1789 def clearcaches(self, clear_persisted_data=False):
1795 def clearcaches(self, clear_persisted_data=False):
1790 self._revlog.clearcaches()
1796 self._revlog.clearcaches()
1791 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1797 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1792 self._dirlogcache = {self.tree: self}
1798 self._dirlogcache = {self.tree: self}
1793
1799
1794 def dirlog(self, d):
1800 def dirlog(self, d):
1795 if d:
1801 if d:
1796 assert self._treeondisk
1802 assert self._treeondisk
1797 if d not in self._dirlogcache:
1803 if d not in self._dirlogcache:
1798 mfrevlog = manifestrevlog(
1804 mfrevlog = manifestrevlog(
1799 self.nodeconstants,
1805 self.nodeconstants,
1800 self.opener,
1806 self.opener,
1801 d,
1807 d,
1802 self._dirlogcache,
1808 self._dirlogcache,
1803 treemanifest=self._treeondisk,
1809 treemanifest=self._treeondisk,
1804 )
1810 )
1805 self._dirlogcache[d] = mfrevlog
1811 self._dirlogcache[d] = mfrevlog
1806 return self._dirlogcache[d]
1812 return self._dirlogcache[d]
1807
1813
1808 def add(
1814 def add(
1809 self,
1815 self,
1810 m,
1816 m,
1811 transaction,
1817 transaction,
1812 link,
1818 link,
1813 p1,
1819 p1,
1814 p2,
1820 p2,
1815 added: Iterable[bytes],
1821 added: Iterable[bytes],
1816 removed: Iterable[bytes],
1822 removed: Iterable[bytes],
1817 readtree=None,
1823 readtree=None,
1818 match=None,
1824 match=None,
1819 ):
1825 ):
1820 """add some manifest entry in to the manifest log
1826 """add some manifest entry in to the manifest log
1821
1827
1822 input:
1828 input:
1823
1829
1824 m: the manifest dict we want to store
1830 m: the manifest dict we want to store
1825 transaction: the open transaction
1831 transaction: the open transaction
1826 p1: manifest-node of p1
1832 p1: manifest-node of p1
1827 p2: manifest-node of p2
1833 p2: manifest-node of p2
1828 added: file added/changed compared to parent
1834 added: file added/changed compared to parent
1829 removed: file removed compared to parent
1835 removed: file removed compared to parent
1830
1836
1831 tree manifest input:
1837 tree manifest input:
1832
1838
1833 readtree: a function to read a subtree
1839 readtree: a function to read a subtree
1834 match: a filematcher for the subpart of the tree manifest
1840 match: a filematcher for the subpart of the tree manifest
1835 """
1841 """
1836 try:
1842 try:
1837 if p1 not in self.fulltextcache:
1843 if p1 not in self.fulltextcache:
1838 raise FastdeltaUnavailable()
1844 raise FastdeltaUnavailable()
1839 # If our first parent is in the manifest cache, we can
1845 # If our first parent is in the manifest cache, we can
1840 # compute a delta here using properties we know about the
1846 # compute a delta here using properties we know about the
1841 # manifest up-front, which may save time later for the
1847 # manifest up-front, which may save time later for the
1842 # revlog layer.
1848 # revlog layer.
1843
1849
1844 _checkforbidden(added)
1850 _checkforbidden(added)
1845 # combine the changed lists into one sorted iterator
1851 # combine the changed lists into one sorted iterator
1846 work = heapq.merge(
1852 work = heapq.merge(
1847 [(x, False) for x in sorted(added)],
1853 [(x, False) for x in sorted(added)],
1848 [(x, True) for x in sorted(removed)],
1854 [(x, True) for x in sorted(removed)],
1849 )
1855 )
1850
1856
1851 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1857 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1852 cachedelta = self._revlog.rev(p1), deltatext
1858 cachedelta = self._revlog.rev(p1), deltatext
1853 text = util.buffer(arraytext)
1859 text = util.buffer(arraytext)
1854 rev = self._revlog.addrevision(
1860 rev = self._revlog.addrevision(
1855 text, transaction, link, p1, p2, cachedelta
1861 text, transaction, link, p1, p2, cachedelta
1856 )
1862 )
1857 n = self._revlog.node(rev)
1863 n = self._revlog.node(rev)
1858 except FastdeltaUnavailable:
1864 except FastdeltaUnavailable:
1859 # The first parent manifest isn't already loaded or the
1865 # The first parent manifest isn't already loaded or the
1860 # manifest implementation doesn't support fastdelta, so
1866 # manifest implementation doesn't support fastdelta, so
1861 # we'll just encode a fulltext of the manifest and pass
1867 # we'll just encode a fulltext of the manifest and pass
1862 # that through to the revlog layer, and let it handle the
1868 # that through to the revlog layer, and let it handle the
1863 # delta process.
1869 # delta process.
1864 if self._treeondisk:
1870 if self._treeondisk:
1865 assert readtree, b"readtree must be set for treemanifest writes"
1871 assert readtree, b"readtree must be set for treemanifest writes"
1866 assert match, b"match must be specified for treemanifest writes"
1872 assert match, b"match must be specified for treemanifest writes"
1867 m1 = readtree(self.tree, p1)
1873 m1 = readtree(self.tree, p1)
1868 m2 = readtree(self.tree, p2)
1874 m2 = readtree(self.tree, p2)
1869 n = self._addtree(
1875 n = self._addtree(
1870 m, transaction, link, m1, m2, readtree, match=match
1876 m, transaction, link, m1, m2, readtree, match=match
1871 )
1877 )
1872 arraytext = None
1878 arraytext = None
1873 else:
1879 else:
1874 text = m.text()
1880 text = m.text()
1875 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1881 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1876 n = self._revlog.node(rev)
1882 n = self._revlog.node(rev)
1877 arraytext = bytearray(text)
1883 arraytext = bytearray(text)
1878
1884
1879 if arraytext is not None:
1885 if arraytext is not None:
1880 self.fulltextcache[n] = arraytext
1886 self.fulltextcache[n] = arraytext
1881
1887
1882 return n
1888 return n
1883
1889
1884 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1890 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1885 # If the manifest is unchanged compared to one parent,
1891 # If the manifest is unchanged compared to one parent,
1886 # don't write a new revision
1892 # don't write a new revision
1887 if self.tree != b'' and (
1893 if self.tree != b'' and (
1888 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1894 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1889 ):
1895 ):
1890 return m.node()
1896 return m.node()
1891
1897
1892 def writesubtree(subm, subp1, subp2, match):
1898 def writesubtree(subm, subp1, subp2, match):
1893 sublog = self.dirlog(subm.dir())
1899 sublog = self.dirlog(subm.dir())
1894 sublog.add(
1900 sublog.add(
1895 subm,
1901 subm,
1896 transaction,
1902 transaction,
1897 link,
1903 link,
1898 subp1,
1904 subp1,
1899 subp2,
1905 subp2,
1900 None,
1906 None,
1901 None,
1907 None,
1902 readtree=readtree,
1908 readtree=readtree,
1903 match=match,
1909 match=match,
1904 )
1910 )
1905
1911
1906 m.writesubtrees(m1, m2, writesubtree, match)
1912 m.writesubtrees(m1, m2, writesubtree, match)
1907 text = m.dirtext()
1913 text = m.dirtext()
1908 n = None
1914 n = None
1909 if self.tree != b'':
1915 if self.tree != b'':
1910 # Double-check whether contents are unchanged to one parent
1916 # Double-check whether contents are unchanged to one parent
1911 if text == m1.dirtext():
1917 if text == m1.dirtext():
1912 n = m1.node()
1918 n = m1.node()
1913 elif text == m2.dirtext():
1919 elif text == m2.dirtext():
1914 n = m2.node()
1920 n = m2.node()
1915
1921
1916 if not n:
1922 if not n:
1917 rev = self._revlog.addrevision(
1923 rev = self._revlog.addrevision(
1918 text, transaction, link, m1.node(), m2.node()
1924 text, transaction, link, m1.node(), m2.node()
1919 )
1925 )
1920 n = self._revlog.node(rev)
1926 n = self._revlog.node(rev)
1921
1927
1922 # Save nodeid so parent manifest can calculate its nodeid
1928 # Save nodeid so parent manifest can calculate its nodeid
1923 m.setnode(n)
1929 m.setnode(n)
1924 return n
1930 return n
1925
1931
1926 def __len__(self):
1932 def __len__(self):
1927 return len(self._revlog)
1933 return len(self._revlog)
1928
1934
1929 def __iter__(self):
1935 def __iter__(self):
1930 return self._revlog.__iter__()
1936 return self._revlog.__iter__()
1931
1937
1932 def rev(self, node):
1938 def rev(self, node):
1933 return self._revlog.rev(node)
1939 return self._revlog.rev(node)
1934
1940
1935 def node(self, rev):
1941 def node(self, rev):
1936 return self._revlog.node(rev)
1942 return self._revlog.node(rev)
1937
1943
1938 def lookup(self, value):
1944 def lookup(self, value):
1939 return self._revlog.lookup(value)
1945 return self._revlog.lookup(value)
1940
1946
1941 def parentrevs(self, rev):
1947 def parentrevs(self, rev):
1942 return self._revlog.parentrevs(rev)
1948 return self._revlog.parentrevs(rev)
1943
1949
1944 def parents(self, node):
1950 def parents(self, node):
1945 return self._revlog.parents(node)
1951 return self._revlog.parents(node)
1946
1952
1947 def linkrev(self, rev):
1953 def linkrev(self, rev):
1948 return self._revlog.linkrev(rev)
1954 return self._revlog.linkrev(rev)
1949
1955
1950 def checksize(self):
1956 def checksize(self):
1951 return self._revlog.checksize()
1957 return self._revlog.checksize()
1952
1958
1953 def revision(self, node):
1959 def revision(self, node):
1954 return self._revlog.revision(node)
1960 return self._revlog.revision(node)
1955
1961
1956 def rawdata(self, node):
1962 def rawdata(self, node):
1957 return self._revlog.rawdata(node)
1963 return self._revlog.rawdata(node)
1958
1964
1959 def revdiff(self, rev1, rev2):
1965 def revdiff(self, rev1, rev2):
1960 return self._revlog.revdiff(rev1, rev2)
1966 return self._revlog.revdiff(rev1, rev2)
1961
1967
1962 def cmp(self, node, text):
1968 def cmp(self, node, text):
1963 return self._revlog.cmp(node, text)
1969 return self._revlog.cmp(node, text)
1964
1970
1965 def deltaparent(self, rev):
1971 def deltaparent(self, rev):
1966 return self._revlog.deltaparent(rev)
1972 return self._revlog.deltaparent(rev)
1967
1973
1968 def emitrevisions(
1974 def emitrevisions(
1969 self,
1975 self,
1970 nodes,
1976 nodes,
1971 nodesorder=None,
1977 nodesorder=None,
1972 revisiondata=False,
1978 revisiondata=False,
1973 assumehaveparentrevisions=False,
1979 assumehaveparentrevisions=False,
1974 deltamode=repository.CG_DELTAMODE_STD,
1980 deltamode=repository.CG_DELTAMODE_STD,
1975 sidedata_helpers=None,
1981 sidedata_helpers=None,
1976 debug_info=None,
1982 debug_info=None,
1977 ):
1983 ):
1978 return self._revlog.emitrevisions(
1984 return self._revlog.emitrevisions(
1979 nodes,
1985 nodes,
1980 nodesorder=nodesorder,
1986 nodesorder=nodesorder,
1981 revisiondata=revisiondata,
1987 revisiondata=revisiondata,
1982 assumehaveparentrevisions=assumehaveparentrevisions,
1988 assumehaveparentrevisions=assumehaveparentrevisions,
1983 deltamode=deltamode,
1989 deltamode=deltamode,
1984 sidedata_helpers=sidedata_helpers,
1990 sidedata_helpers=sidedata_helpers,
1985 debug_info=debug_info,
1991 debug_info=debug_info,
1986 )
1992 )
1987
1993
1988 def addgroup(
1994 def addgroup(
1989 self,
1995 self,
1990 deltas,
1996 deltas,
1991 linkmapper,
1997 linkmapper,
1992 transaction,
1998 transaction,
1993 alwayscache=False,
1999 alwayscache=False,
1994 addrevisioncb=None,
2000 addrevisioncb=None,
1995 duplicaterevisioncb=None,
2001 duplicaterevisioncb=None,
1996 debug_info=None,
2002 debug_info=None,
1997 delta_base_reuse_policy=None,
2003 delta_base_reuse_policy=None,
1998 ):
2004 ):
1999 return self._revlog.addgroup(
2005 return self._revlog.addgroup(
2000 deltas,
2006 deltas,
2001 linkmapper,
2007 linkmapper,
2002 transaction,
2008 transaction,
2003 alwayscache=alwayscache,
2009 alwayscache=alwayscache,
2004 addrevisioncb=addrevisioncb,
2010 addrevisioncb=addrevisioncb,
2005 duplicaterevisioncb=duplicaterevisioncb,
2011 duplicaterevisioncb=duplicaterevisioncb,
2006 debug_info=debug_info,
2012 debug_info=debug_info,
2007 delta_base_reuse_policy=delta_base_reuse_policy,
2013 delta_base_reuse_policy=delta_base_reuse_policy,
2008 )
2014 )
2009
2015
2010 def rawsize(self, rev):
2016 def rawsize(self, rev):
2011 return self._revlog.rawsize(rev)
2017 return self._revlog.rawsize(rev)
2012
2018
2013 def getstrippoint(self, minlink):
2019 def getstrippoint(self, minlink):
2014 return self._revlog.getstrippoint(minlink)
2020 return self._revlog.getstrippoint(minlink)
2015
2021
2016 def strip(self, minlink, transaction):
2022 def strip(self, minlink, transaction):
2017 return self._revlog.strip(minlink, transaction)
2023 return self._revlog.strip(minlink, transaction)
2018
2024
2019 def files(self):
2025 def files(self):
2020 return self._revlog.files()
2026 return self._revlog.files()
2021
2027
2022 def clone(self, tr, destrevlog, **kwargs):
2028 def clone(self, tr, destrevlog, **kwargs):
2023 if not isinstance(destrevlog, manifestrevlog):
2029 if not isinstance(destrevlog, manifestrevlog):
2024 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
2030 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
2025
2031
2026 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
2032 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
2027
2033
2028 def storageinfo(
2034 def storageinfo(
2029 self,
2035 self,
2030 exclusivefiles=False,
2036 exclusivefiles=False,
2031 sharedfiles=False,
2037 sharedfiles=False,
2032 revisionscount=False,
2038 revisionscount=False,
2033 trackedsize=False,
2039 trackedsize=False,
2034 storedsize=False,
2040 storedsize=False,
2035 ):
2041 ):
2036 return self._revlog.storageinfo(
2042 return self._revlog.storageinfo(
2037 exclusivefiles=exclusivefiles,
2043 exclusivefiles=exclusivefiles,
2038 sharedfiles=sharedfiles,
2044 sharedfiles=sharedfiles,
2039 revisionscount=revisionscount,
2045 revisionscount=revisionscount,
2040 trackedsize=trackedsize,
2046 trackedsize=trackedsize,
2041 storedsize=storedsize,
2047 storedsize=storedsize,
2042 )
2048 )
2043
2049
2044 @property
2050 @property
2045 def opener(self):
2051 def opener(self):
2046 return self._revlog.opener
2052 return self._revlog.opener
2047
2053
2048 @opener.setter
2054 @opener.setter
2049 def opener(self, value):
2055 def opener(self, value):
2050 self._revlog.opener = value
2056 self._revlog.opener = value
2051
2057
2052
2058
2053 manifestrevlog = interfaceutil.implementer(repository.imanifeststorage)(
2059 manifestrevlog = interfaceutil.implementer(repository.imanifeststorage)(
2054 ManifestRevlog
2060 ManifestRevlog
2055 )
2061 )
2056
2062
2057 if typing.TYPE_CHECKING:
2063 if typing.TYPE_CHECKING:
2058 manifestrevlog = ManifestRevlog
2064 manifestrevlog = ManifestRevlog
2059
2065
2060 AnyManifestCtx = Union['ManifestCtx', 'TreeManifestCtx']
2066 AnyManifestCtx = Union['ManifestCtx', 'TreeManifestCtx']
2061 AnyManifestDict = Union[ManifestDict, TreeManifest]
2067 AnyManifestDict = Union[ManifestDict, TreeManifest]
2062
2068
2063
2069
2064 class ManifestLog:
2070 class ManifestLog:
2065 """A collection class representing the collection of manifest snapshots
2071 """A collection class representing the collection of manifest snapshots
2066 referenced by commits in the repository.
2072 referenced by commits in the repository.
2067
2073
2068 In this situation, 'manifest' refers to the abstract concept of a snapshot
2074 In this situation, 'manifest' refers to the abstract concept of a snapshot
2069 of the list of files in the given commit. Consumers of the output of this
2075 of the list of files in the given commit. Consumers of the output of this
2070 class do not care about the implementation details of the actual manifests
2076 class do not care about the implementation details of the actual manifests
2071 they receive (i.e. tree or flat or lazily loaded, etc)."""
2077 they receive (i.e. tree or flat or lazily loaded, etc)."""
2072
2078
2073 def __init__(self, opener, repo, rootstore, narrowmatch):
2079 def __init__(self, opener, repo, rootstore, narrowmatch):
2074 self.nodeconstants = repo.nodeconstants
2080 self.nodeconstants = repo.nodeconstants
2075 usetreemanifest = False
2081 usetreemanifest = False
2076 cachesize = 4
2082 cachesize = 4
2077
2083
2078 opts = getattr(opener, 'options', None)
2084 opts = getattr(opener, 'options', None)
2079 if opts is not None:
2085 if opts is not None:
2080 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
2086 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
2081 cachesize = opts.get(b'manifestcachesize', cachesize)
2087 cachesize = opts.get(b'manifestcachesize', cachesize)
2082
2088
2083 self._treemanifests = usetreemanifest
2089 self._treemanifests = usetreemanifest
2084
2090
2085 self._rootstore = rootstore
2091 self._rootstore = rootstore
2086 self._rootstore._setupmanifestcachehooks(repo)
2092 self._rootstore._setupmanifestcachehooks(repo)
2087 self._narrowmatch = narrowmatch
2093 self._narrowmatch = narrowmatch
2088
2094
2089 # A cache of the manifestctx or treemanifestctx for each directory
2095 # A cache of the manifestctx or treemanifestctx for each directory
2090 self._dirmancache = {}
2096 self._dirmancache = {}
2091 self._dirmancache[b''] = util.lrucachedict(cachesize)
2097 self._dirmancache[b''] = util.lrucachedict(cachesize)
2092
2098
2093 self._cachesize = cachesize
2099 self._cachesize = cachesize
2094
2100
2095 def __getitem__(self, node):
2101 def __getitem__(self, node):
2096 """Retrieves the manifest instance for the given node. Throws a
2102 """Retrieves the manifest instance for the given node. Throws a
2097 LookupError if not found.
2103 LookupError if not found.
2098 """
2104 """
2099 return self.get(b'', node)
2105 return self.get(b'', node)
2100
2106
2101 @property
2107 @property
2102 def narrowed(self):
2108 def narrowed(self):
2103 return not (self._narrowmatch is None or self._narrowmatch.always())
2109 return not (self._narrowmatch is None or self._narrowmatch.always())
2104
2110
2105 def get(
2111 def get(
2106 self, tree: bytes, node: bytes, verify: bool = True
2112 self, tree: bytes, node: bytes, verify: bool = True
2107 ) -> AnyManifestCtx:
2113 ) -> AnyManifestCtx:
2108 """Retrieves the manifest instance for the given node. Throws a
2114 """Retrieves the manifest instance for the given node. Throws a
2109 LookupError if not found.
2115 LookupError if not found.
2110
2116
2111 `verify` - if True an exception will be thrown if the node is not in
2117 `verify` - if True an exception will be thrown if the node is not in
2112 the revlog
2118 the revlog
2113 """
2119 """
2114 if node in self._dirmancache.get(tree, ()):
2120 if node in self._dirmancache.get(tree, ()):
2115 return self._dirmancache[tree][node]
2121 return self._dirmancache[tree][node]
2116
2122
2117 if not self._narrowmatch.always():
2123 if not self._narrowmatch.always():
2118 if not self._narrowmatch.visitdir(tree[:-1]):
2124 if not self._narrowmatch.visitdir(tree[:-1]):
2119 return excludeddirmanifestctx(self.nodeconstants, tree, node)
2125 return excludeddirmanifestctx(self.nodeconstants, tree, node)
2120 if tree:
2126 if tree:
2121 if self._rootstore._treeondisk:
2127 if self._rootstore._treeondisk:
2122 if verify:
2128 if verify:
2123 # Side-effect is LookupError is raised if node doesn't
2129 # Side-effect is LookupError is raised if node doesn't
2124 # exist.
2130 # exist.
2125 self.getstorage(tree).rev(node)
2131 self.getstorage(tree).rev(node)
2126
2132
2127 m = treemanifestctx(self, tree, node)
2133 m = treemanifestctx(self, tree, node)
2128 else:
2134 else:
2129 raise error.Abort(
2135 raise error.Abort(
2130 _(
2136 _(
2131 b"cannot ask for manifest directory '%s' in a flat "
2137 b"cannot ask for manifest directory '%s' in a flat "
2132 b"manifest"
2138 b"manifest"
2133 )
2139 )
2134 % tree
2140 % tree
2135 )
2141 )
2136 else:
2142 else:
2137 if verify:
2143 if verify:
2138 # Side-effect is LookupError is raised if node doesn't exist.
2144 # Side-effect is LookupError is raised if node doesn't exist.
2139 self._rootstore.rev(node)
2145 self._rootstore.rev(node)
2140
2146
2141 if self._treemanifests:
2147 if self._treemanifests:
2142 m = treemanifestctx(self, b'', node)
2148 m = treemanifestctx(self, b'', node)
2143 else:
2149 else:
2144 m = manifestctx(self, node)
2150 m = manifestctx(self, node)
2145
2151
2146 if node != self.nodeconstants.nullid:
2152 if node != self.nodeconstants.nullid:
2147 mancache = self._dirmancache.get(tree)
2153 mancache = self._dirmancache.get(tree)
2148 if not mancache:
2154 if not mancache:
2149 mancache = util.lrucachedict(self._cachesize)
2155 mancache = util.lrucachedict(self._cachesize)
2150 self._dirmancache[tree] = mancache
2156 self._dirmancache[tree] = mancache
2151 mancache[node] = m
2157 mancache[node] = m
2152 return m
2158 return m
2153
2159
2154 def getstorage(self, tree):
2160 def getstorage(self, tree):
2155 return self._rootstore.dirlog(tree)
2161 return self._rootstore.dirlog(tree)
2156
2162
2157 def clearcaches(self, clear_persisted_data: bool = False) -> None:
2163 def clearcaches(self, clear_persisted_data: bool = False) -> None:
2158 self._dirmancache.clear()
2164 self._dirmancache.clear()
2159 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
2165 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
2160
2166
2161 def rev(self, node) -> int:
2167 def rev(self, node) -> int:
2162 return self._rootstore.rev(node)
2168 return self._rootstore.rev(node)
2163
2169
2164 def update_caches(self, transaction) -> None:
2170 def update_caches(self, transaction) -> None:
2165 return self._rootstore._revlog.update_caches(transaction=transaction)
2171 return self._rootstore._revlog.update_caches(transaction=transaction)
2166
2172
2167
2173
2168 manifestlog = interfaceutil.implementer(repository.imanifestlog)(ManifestLog)
2174 manifestlog = interfaceutil.implementer(repository.imanifestlog)(ManifestLog)
2169
2175
2170 if typing.TYPE_CHECKING:
2176 if typing.TYPE_CHECKING:
2171 manifestlog = ManifestLog
2177 manifestlog = ManifestLog
2172
2178
2173
2179
2174 class MemManifestCtx:
2180 class MemManifestCtx:
2181 _manifestdict: ManifestDict
2182
2175 def __init__(self, manifestlog):
2183 def __init__(self, manifestlog):
2176 self._manifestlog = manifestlog
2184 self._manifestlog = manifestlog
2177 self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen)
2185 self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen)
2178
2186
2179 def _storage(self) -> ManifestRevlog:
2187 def _storage(self) -> ManifestRevlog:
2180 return self._manifestlog.getstorage(b'')
2188 return self._manifestlog.getstorage(b'')
2181
2189
2182 def copy(self) -> 'MemManifestCtx':
2190 def copy(self) -> 'MemManifestCtx':
2183 memmf = memmanifestctx(self._manifestlog)
2191 memmf = memmanifestctx(self._manifestlog)
2184 memmf._manifestdict = self.read().copy()
2192 memmf._manifestdict = self.read().copy()
2185 return memmf
2193 return memmf
2186
2194
2187 def read(self) -> 'ManifestDict':
2195 def read(self) -> 'ManifestDict':
2188 return self._manifestdict
2196 return self._manifestdict
2189
2197
2190 def write(self, transaction, link, p1, p2, added, removed, match=None):
2198 def write(self, transaction, link, p1, p2, added, removed, match=None):
2191 return self._storage().add(
2199 return self._storage().add(
2192 self._manifestdict,
2200 self._manifestdict,
2193 transaction,
2201 transaction,
2194 link,
2202 link,
2195 p1,
2203 p1,
2196 p2,
2204 p2,
2197 added,
2205 added,
2198 removed,
2206 removed,
2199 match=match,
2207 match=match,
2200 )
2208 )
2201
2209
2202
2210
2203 memmanifestctx = interfaceutil.implementer(
2211 memmanifestctx = interfaceutil.implementer(
2204 repository.imanifestrevisionwritable
2212 repository.imanifestrevisionwritable
2205 )(MemManifestCtx)
2213 )(MemManifestCtx)
2206
2214
2207 if typing.TYPE_CHECKING:
2215 if typing.TYPE_CHECKING:
2208 memmanifestctx = MemManifestCtx
2216 memmanifestctx = MemManifestCtx
2209
2217
2210
2218
2211 class ManifestCtx:
2219 class ManifestCtx:
2212 """A class representing a single revision of a manifest, including its
2220 """A class representing a single revision of a manifest, including its
2213 contents, its parent revs, and its linkrev.
2221 contents, its parent revs, and its linkrev.
2214 """
2222 """
2215
2223
2224 _data: Optional[ManifestDict]
2225
2216 def __init__(self, manifestlog, node):
2226 def __init__(self, manifestlog, node):
2217 self._manifestlog = manifestlog
2227 self._manifestlog = manifestlog
2218 self._data = None
2228 self._data = None
2219
2229
2220 self._node = node
2230 self._node = node
2221
2231
2222 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2232 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2223 # but let's add it later when something needs it and we can load it
2233 # but let's add it later when something needs it and we can load it
2224 # lazily.
2234 # lazily.
2225 # self.p1, self.p2 = store.parents(node)
2235 # self.p1, self.p2 = store.parents(node)
2226 # rev = store.rev(node)
2236 # rev = store.rev(node)
2227 # self.linkrev = store.linkrev(rev)
2237 # self.linkrev = store.linkrev(rev)
2228
2238
2229 def _storage(self) -> 'ManifestRevlog':
2239 def _storage(self) -> 'ManifestRevlog':
2230 return self._manifestlog.getstorage(b'')
2240 return self._manifestlog.getstorage(b'')
2231
2241
2232 def node(self) -> bytes:
2242 def node(self) -> bytes:
2233 return self._node
2243 return self._node
2234
2244
2235 def copy(self) -> MemManifestCtx:
2245 def copy(self) -> MemManifestCtx:
2236 memmf = memmanifestctx(self._manifestlog)
2246 memmf = memmanifestctx(self._manifestlog)
2237 memmf._manifestdict = self.read().copy()
2247 memmf._manifestdict = self.read().copy()
2238 return memmf
2248 return memmf
2239
2249
2240 @propertycache
2250 @propertycache
2241 def parents(self) -> Tuple[bytes, bytes]:
2251 def parents(self) -> Tuple[bytes, bytes]:
2242 return self._storage().parents(self._node)
2252 return self._storage().parents(self._node)
2243
2253
2244 def read(self) -> 'ManifestDict':
2254 def read(self) -> 'ManifestDict':
2245 if self._data is None:
2255 if self._data is None:
2246 nc = self._manifestlog.nodeconstants
2256 nc = self._manifestlog.nodeconstants
2247 if self._node == nc.nullid:
2257 if self._node == nc.nullid:
2248 self._data = manifestdict(nc.nodelen)
2258 self._data = manifestdict(nc.nodelen)
2249 else:
2259 else:
2250 store = self._storage()
2260 store = self._storage()
2251 if self._node in store.fulltextcache:
2261 if self._node in store.fulltextcache:
2252 text = pycompat.bytestr(store.fulltextcache[self._node])
2262 text = pycompat.bytestr(store.fulltextcache[self._node])
2253 else:
2263 else:
2254 text = store.revision(self._node)
2264 text = store.revision(self._node)
2255 arraytext = bytearray(text)
2265 arraytext = bytearray(text)
2256 store.fulltextcache[self._node] = arraytext
2266 store.fulltextcache[self._node] = arraytext
2257 self._data = manifestdict(nc.nodelen, text)
2267 self._data = manifestdict(nc.nodelen, text)
2258 return self._data
2268 return self._data
2259
2269
2260 def readfast(self, shallow: bool = False) -> 'ManifestDict':
2270 def readfast(self, shallow: bool = False) -> 'ManifestDict':
2261 """Calls either readdelta or read, based on which would be less work.
2271 """Calls either readdelta or read, based on which would be less work.
2262 readdelta is called if the delta is against the p1, and therefore can be
2272 readdelta is called if the delta is against the p1, and therefore can be
2263 read quickly.
2273 read quickly.
2264
2274
2265 If `shallow` is True, nothing changes since this is a flat manifest.
2275 If `shallow` is True, nothing changes since this is a flat manifest.
2266 """
2276 """
2267 util.nouideprecwarn(
2277 util.nouideprecwarn(
2268 b'"readfast" is deprecated use "read_any_fast_delta" or "read_delta_parents"',
2278 b'"readfast" is deprecated use "read_any_fast_delta" or "read_delta_parents"',
2269 b"6.9",
2279 b"6.9",
2270 stacklevel=2,
2280 stacklevel=2,
2271 )
2281 )
2272 store = self._storage()
2282 store = self._storage()
2273 r = store.rev(self._node)
2283 r = store.rev(self._node)
2274 deltaparent = store.deltaparent(r)
2284 deltaparent = store.deltaparent(r)
2275 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2285 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2276 return self.readdelta()
2286 return self.readdelta()
2277 return self.read()
2287 return self.read()
2278
2288
2279 def readdelta(self, shallow: bool = False) -> 'ManifestDict':
2289 def readdelta(self, shallow: bool = False) -> 'ManifestDict':
2280 """Returns a manifest containing just the entries that are present
2290 """Returns a manifest containing just the entries that are present
2281 in this manifest, but not in its p1 manifest. This is efficient to read
2291 in this manifest, but not in its p1 manifest. This is efficient to read
2282 if the revlog delta is already p1.
2292 if the revlog delta is already p1.
2283
2293
2284 Changing the value of `shallow` has no effect on flat manifests.
2294 Changing the value of `shallow` has no effect on flat manifests.
2285 """
2295 """
2286 util.nouideprecwarn(
2296 util.nouideprecwarn(
2287 b'"readfast" is deprecated use "read_any_fast_delta" or "read_delta_new_entries"',
2297 b'"readfast" is deprecated use "read_any_fast_delta" or "read_delta_new_entries"',
2288 b"6.9",
2298 b"6.9",
2289 stacklevel=2,
2299 stacklevel=2,
2290 )
2300 )
2291 store = self._storage()
2301 store = self._storage()
2292 r = store.rev(self._node)
2302 r = store.rev(self._node)
2293 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2303 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2294 return manifestdict(store.nodeconstants.nodelen, d)
2304 return manifestdict(store.nodeconstants.nodelen, d)
2295
2305
2296 def read_any_fast_delta(
2306 def read_any_fast_delta(
2297 self,
2307 self,
2298 valid_bases: Optional[Collection[int]] = None,
2308 valid_bases: Optional[Collection[int]] = None,
2299 *,
2309 *,
2300 shallow: bool = False,
2310 shallow: bool = False,
2301 ) -> Tuple[Optional[int], ManifestDict]:
2311 ) -> Tuple[Optional[int], ManifestDict]:
2302 """see `imanifestrevisionstored` documentation"""
2312 """see `imanifestrevisionstored` documentation"""
2303 store = self._storage()
2313 store = self._storage()
2304 r = store.rev(self._node)
2314 r = store.rev(self._node)
2305 deltaparent = store.deltaparent(r)
2315 deltaparent = store.deltaparent(r)
2306 if valid_bases is None:
2316 if valid_bases is None:
2307 # make sure the next check is True
2317 # make sure the next check is True
2308 valid_bases = (deltaparent,)
2318 valid_bases = (deltaparent,)
2309 if deltaparent != nullrev and deltaparent in valid_bases:
2319 if deltaparent != nullrev and deltaparent in valid_bases:
2310 d = mdiff.patchtext(store.revdiff(deltaparent, r))
2320 d = mdiff.patchtext(store.revdiff(deltaparent, r))
2311 return (
2321 return (
2312 deltaparent,
2322 deltaparent,
2313 manifestdict(store.nodeconstants.nodelen, d),
2323 manifestdict(store.nodeconstants.nodelen, d),
2314 )
2324 )
2315 return (None, self.read())
2325 return (None, self.read())
2316
2326
2317 def read_delta_parents(
2327 def read_delta_parents(
2318 self,
2328 self,
2319 *,
2329 *,
2320 shallow: bool = False,
2330 shallow: bool = False,
2321 exact: bool = True,
2331 exact: bool = True,
2322 ) -> ManifestDict:
2332 ) -> ManifestDict:
2323 """see `interface.imanifestrevisionbase` documentations"""
2333 """see `interface.imanifestrevisionbase` documentations"""
2324 store = self._storage()
2334 store = self._storage()
2325 r = store.rev(self._node)
2335 r = store.rev(self._node)
2326 deltaparent = store.deltaparent(r)
2336 deltaparent = store.deltaparent(r)
2327 parents = [p for p in store.parentrevs(r) if p is not nullrev]
2337 parents = [p for p in store.parentrevs(r) if p is not nullrev]
2328 if not exact and deltaparent in parents:
2338 if not exact and deltaparent in parents:
2329 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2339 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2330 return manifestdict(store.nodeconstants.nodelen, d)
2340 return manifestdict(store.nodeconstants.nodelen, d)
2331 elif not exact or len(parents) == 0:
2341 elif not exact or len(parents) == 0:
2332 return self.read()
2342 return self.read()
2333 elif len(parents) == 1:
2343 elif len(parents) == 1:
2334 p = parents[0]
2344 p = parents[0]
2335 d = mdiff.patchtext(store.revdiff(p, r))
2345 d = mdiff.patchtext(store.revdiff(p, r))
2336 return manifestdict(store.nodeconstants.nodelen, d)
2346 return manifestdict(store.nodeconstants.nodelen, d)
2337 else:
2347 else:
2338 p1, p2 = parents
2348 p1, p2 = parents
2339 d1 = mdiff.patchtext(store.revdiff(p1, r))
2349 d1 = mdiff.patchtext(store.revdiff(p1, r))
2340 d2 = mdiff.patchtext(store.revdiff(p2, r))
2350 d2 = mdiff.patchtext(store.revdiff(p2, r))
2341 d1 = manifestdict(store.nodeconstants.nodelen, d1)
2351 d1 = manifestdict(store.nodeconstants.nodelen, d1)
2342 d2 = manifestdict(store.nodeconstants.nodelen, d2)
2352 d2 = manifestdict(store.nodeconstants.nodelen, d2)
2343 md = manifestdict(store.nodeconstants.nodelen)
2353 md = manifestdict(store.nodeconstants.nodelen)
2344 for f, new_node, new_flag in d1.iterentries():
2354 for f, new_node, new_flag in d1.iterentries():
2345 if f not in d2:
2355 if f not in d2:
2346 continue
2356 continue
2347 if new_node is not None:
2357 if new_node is not None:
2348 md.set(f, new_node, new_flag)
2358 md.set(f, new_node, new_flag)
2349 return md
2359 return md
2350
2360
2351 def read_delta_new_entries(self, *, shallow=False) -> ManifestDict:
2361 def read_delta_new_entries(self, *, shallow=False) -> ManifestDict:
2352 """see `interface.imanifestrevisionbase` documentations"""
2362 """see `interface.imanifestrevisionbase` documentations"""
2353 # If we are using narrow, returning a delta against an arbitrary
2363 # If we are using narrow, returning a delta against an arbitrary
2354 # changeset might return file outside the narrowspec. This can create
2364 # changeset might return file outside the narrowspec. This can create
2355 # issue when running validation server side with strict security as
2365 # issue when running validation server side with strict security as
2356 # push from low priviledge usage might be seen as adding new revision
2366 # push from low priviledge usage might be seen as adding new revision
2357 # for files they cannot touch. So we are strict if narrow is involved.
2367 # for files they cannot touch. So we are strict if narrow is involved.
2358 if self._manifestlog.narrowed:
2368 if self._manifestlog.narrowed:
2359 return self.read_delta_parents(shallow=shallow, exact=True)
2369 return self.read_delta_parents(shallow=shallow, exact=True)
2360 store = self._storage()
2370 store = self._storage()
2361 r = store.rev(self._node)
2371 r = store.rev(self._node)
2362 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2372 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2363 return manifestdict(store.nodeconstants.nodelen, d)
2373 return manifestdict(store.nodeconstants.nodelen, d)
2364
2374
2365 def find(self, key: bytes) -> Tuple[bytes, bytes]:
2375 def find(self, key: bytes) -> Tuple[bytes, bytes]:
2366 return self.read().find(key)
2376 return self.read().find(key)
2367
2377
2368
2378
2369 manifestctx = interfaceutil.implementer(repository.imanifestrevisionstored)(
2379 manifestctx = interfaceutil.implementer(repository.imanifestrevisionstored)(
2370 ManifestCtx
2380 ManifestCtx
2371 )
2381 )
2372
2382
2373 if typing.TYPE_CHECKING:
2383 if typing.TYPE_CHECKING:
2374 manifestctx = ManifestCtx
2384 manifestctx = ManifestCtx
2375
2385
2376
2386
2377 class MemTreeManifestCtx:
2387 class MemTreeManifestCtx:
2388 _treemanifest: TreeManifest
2389
2378 def __init__(self, manifestlog, dir=b''):
2390 def __init__(self, manifestlog, dir=b''):
2379 self._manifestlog = manifestlog
2391 self._manifestlog = manifestlog
2380 self._dir = dir
2392 self._dir = dir
2381 self._treemanifest = treemanifest(manifestlog.nodeconstants)
2393 self._treemanifest = treemanifest(manifestlog.nodeconstants)
2382
2394
2383 def _storage(self) -> ManifestRevlog:
2395 def _storage(self) -> ManifestRevlog:
2384 return self._manifestlog.getstorage(b'')
2396 return self._manifestlog.getstorage(b'')
2385
2397
2386 def copy(self) -> 'MemTreeManifestCtx':
2398 def copy(self) -> 'MemTreeManifestCtx':
2387 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2399 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2388 memmf._treemanifest = self._treemanifest.copy()
2400 memmf._treemanifest = self._treemanifest.copy()
2389 return memmf
2401 return memmf
2390
2402
2391 def read(self) -> 'TreeManifest':
2403 def read(self) -> 'TreeManifest':
2392 return self._treemanifest
2404 return self._treemanifest
2393
2405
2394 def write(self, transaction, link, p1, p2, added, removed, match=None):
2406 def write(self, transaction, link, p1, p2, added, removed, match=None):
2395 def readtree(dir, node):
2407 def readtree(dir, node):
2396 return self._manifestlog.get(dir, node).read()
2408 return self._manifestlog.get(dir, node).read()
2397
2409
2398 return self._storage().add(
2410 return self._storage().add(
2399 self._treemanifest,
2411 self._treemanifest,
2400 transaction,
2412 transaction,
2401 link,
2413 link,
2402 p1,
2414 p1,
2403 p2,
2415 p2,
2404 added,
2416 added,
2405 removed,
2417 removed,
2406 readtree=readtree,
2418 readtree=readtree,
2407 match=match,
2419 match=match,
2408 )
2420 )
2409
2421
2410
2422
2411 memtreemanifestctx = interfaceutil.implementer(
2423 memtreemanifestctx = interfaceutil.implementer(
2412 repository.imanifestrevisionwritable
2424 repository.imanifestrevisionwritable
2413 )(MemTreeManifestCtx)
2425 )(MemTreeManifestCtx)
2414
2426
2415 if typing.TYPE_CHECKING:
2427 if typing.TYPE_CHECKING:
2416 memtreemanifestctx = MemTreeManifestCtx
2428 memtreemanifestctx = MemTreeManifestCtx
2417
2429
2418
2430
2419 class TreeManifestCtx:
2431 class TreeManifestCtx:
2432 _data: Optional[TreeManifest]
2433
2420 def __init__(self, manifestlog, dir, node):
2434 def __init__(self, manifestlog, dir, node):
2421 self._manifestlog = manifestlog
2435 self._manifestlog = manifestlog
2422 self._dir = dir
2436 self._dir = dir
2423 self._data = None
2437 self._data = None
2424
2438
2425 self._node = node
2439 self._node = node
2426
2440
2427 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2441 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2428 # we can instantiate treemanifestctx objects for directories we don't
2442 # we can instantiate treemanifestctx objects for directories we don't
2429 # have on disk.
2443 # have on disk.
2430 # self.p1, self.p2 = store.parents(node)
2444 # self.p1, self.p2 = store.parents(node)
2431 # rev = store.rev(node)
2445 # rev = store.rev(node)
2432 # self.linkrev = store.linkrev(rev)
2446 # self.linkrev = store.linkrev(rev)
2433
2447
2434 def _storage(self) -> ManifestRevlog:
2448 def _storage(self) -> ManifestRevlog:
2435 narrowmatch = self._manifestlog._narrowmatch
2449 narrowmatch = self._manifestlog._narrowmatch
2436 if not narrowmatch.always():
2450 if not narrowmatch.always():
2437 if not narrowmatch.visitdir(self._dir[:-1]):
2451 if not narrowmatch.visitdir(self._dir[:-1]):
2438 return excludedmanifestrevlog(
2452 return excludedmanifestrevlog(
2439 self._manifestlog.nodeconstants, self._dir
2453 self._manifestlog.nodeconstants, self._dir
2440 )
2454 )
2441 return self._manifestlog.getstorage(self._dir)
2455 return self._manifestlog.getstorage(self._dir)
2442
2456
2443 def read(self) -> 'TreeManifest':
2457 def read(self) -> 'TreeManifest':
2444 if self._data is None:
2458 if self._data is None:
2445 store = self._storage()
2459 store = self._storage()
2446 if self._node == self._manifestlog.nodeconstants.nullid:
2460 if self._node == self._manifestlog.nodeconstants.nullid:
2447 self._data = treemanifest(self._manifestlog.nodeconstants)
2461 self._data = treemanifest(self._manifestlog.nodeconstants)
2448 # TODO accessing non-public API
2462 # TODO accessing non-public API
2449 elif store._treeondisk:
2463 elif store._treeondisk:
2450 m = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2464 m = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2451
2465
2452 def gettext():
2466 def gettext():
2453 return store.revision(self._node)
2467 return store.revision(self._node)
2454
2468
2455 def readsubtree(dir, subm):
2469 def readsubtree(dir, subm):
2456 # Set verify to False since we need to be able to create
2470 # Set verify to False since we need to be able to create
2457 # subtrees for trees that don't exist on disk.
2471 # subtrees for trees that don't exist on disk.
2458 return self._manifestlog.get(dir, subm, verify=False).read()
2472 return self._manifestlog.get(dir, subm, verify=False).read()
2459
2473
2460 m.read(gettext, readsubtree)
2474 m.read(gettext, readsubtree)
2461 m.setnode(self._node)
2475 m.setnode(self._node)
2462 self._data = m
2476 self._data = m
2463 else:
2477 else:
2464 if self._node in store.fulltextcache:
2478 if self._node in store.fulltextcache:
2465 text = pycompat.bytestr(store.fulltextcache[self._node])
2479 text = pycompat.bytestr(store.fulltextcache[self._node])
2466 else:
2480 else:
2467 text = store.revision(self._node)
2481 text = store.revision(self._node)
2468 arraytext = bytearray(text)
2482 arraytext = bytearray(text)
2469 store.fulltextcache[self._node] = arraytext
2483 store.fulltextcache[self._node] = arraytext
2470 self._data = treemanifest(
2484 self._data = treemanifest(
2471 self._manifestlog.nodeconstants, dir=self._dir, text=text
2485 self._manifestlog.nodeconstants, dir=self._dir, text=text
2472 )
2486 )
2473
2487
2474 return self._data
2488 return self._data
2475
2489
2476 def node(self) -> bytes:
2490 def node(self) -> bytes:
2477 return self._node
2491 return self._node
2478
2492
2479 def copy(self) -> 'MemTreeManifestCtx':
2493 def copy(self) -> 'MemTreeManifestCtx':
2480 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2494 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2481 memmf._treemanifest = self.read().copy()
2495 memmf._treemanifest = self.read().copy()
2482 return memmf
2496 return memmf
2483
2497
2484 @propertycache
2498 @propertycache
2485 def parents(self) -> Tuple[bytes, bytes]:
2499 def parents(self) -> Tuple[bytes, bytes]:
2486 return self._storage().parents(self._node)
2500 return self._storage().parents(self._node)
2487
2501
2488 def readdelta(self, shallow: bool = False) -> AnyManifestDict:
2502 def readdelta(self, shallow: bool = False) -> AnyManifestDict:
2489 """see `imanifestrevisionstored` documentation"""
2503 """see `imanifestrevisionstored` documentation"""
2490 util.nouideprecwarn(
2504 util.nouideprecwarn(
2491 b'"readdelta" is deprecated use "read_any_fast_delta" or "read_delta_new_entries"',
2505 b'"readdelta" is deprecated use "read_any_fast_delta" or "read_delta_new_entries"',
2492 b"6.9",
2506 b"6.9",
2493 stacklevel=2,
2507 stacklevel=2,
2494 )
2508 )
2495 store = self._storage()
2509 store = self._storage()
2496 if shallow:
2510 if shallow:
2497 r = store.rev(self._node)
2511 r = store.rev(self._node)
2498 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2512 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2499 return manifestdict(store.nodeconstants.nodelen, d)
2513 return manifestdict(store.nodeconstants.nodelen, d)
2500 else:
2514 else:
2501 # Need to perform a slow delta
2515 # Need to perform a slow delta
2502 r0 = store.deltaparent(store.rev(self._node))
2516 r0 = store.deltaparent(store.rev(self._node))
2503 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2517 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2504 m1 = self.read()
2518 m1 = self.read()
2505 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2519 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2506 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2520 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2507 if n1:
2521 if n1:
2508 md[f] = n1
2522 md[f] = n1
2509 if fl1:
2523 if fl1:
2510 md.setflag(f, fl1)
2524 md.setflag(f, fl1)
2511 return md
2525 return md
2512
2526
2513 def read_any_fast_delta(
2527 def read_any_fast_delta(
2514 self,
2528 self,
2515 valid_bases: Optional[Collection[int]] = None,
2529 valid_bases: Optional[Collection[int]] = None,
2516 *,
2530 *,
2517 shallow: bool = False,
2531 shallow: bool = False,
2518 ) -> Tuple[Optional[int], AnyManifestDict]:
2532 ) -> Tuple[Optional[int], AnyManifestDict]:
2519 """see `imanifestrevisionstored` documentation"""
2533 """see `imanifestrevisionstored` documentation"""
2520 store = self._storage()
2534 store = self._storage()
2521 r = store.rev(self._node)
2535 r = store.rev(self._node)
2522 deltaparent = store.deltaparent(r)
2536 deltaparent = store.deltaparent(r)
2523
2537
2524 if valid_bases is None:
2538 if valid_bases is None:
2525 # make sure the next check is True
2539 # make sure the next check is True
2526 valid_bases = (deltaparent,)
2540 valid_bases = (deltaparent,)
2527 can_use_delta = deltaparent != nullrev and deltaparent in valid_bases
2541 can_use_delta = deltaparent != nullrev and deltaparent in valid_bases
2528
2542
2529 if shallow:
2543 if shallow:
2530 if can_use_delta:
2544 if can_use_delta:
2531 return (deltaparent, self._read_storage_delta_shallow())
2545 return (deltaparent, self._read_storage_delta_shallow())
2532 else:
2546 else:
2533 d = store.revision(self._node)
2547 d = store.revision(self._node)
2534 return (None, manifestdict(store.nodeconstants.nodelen, d))
2548 return (None, manifestdict(store.nodeconstants.nodelen, d))
2535 else:
2549 else:
2536 # note: This use "slow_delta" here is cargo culted from the previous
2550 # note: This use "slow_delta" here is cargo culted from the previous
2537 # implementation. I am not sure it make sense since the goal here is to
2551 # implementation. I am not sure it make sense since the goal here is to
2538 # be fast, so why are we computing a delta? On the other hand, tree
2552 # be fast, so why are we computing a delta? On the other hand, tree
2539 # manifest delta as fairly "cheap" and allow for skipping whole part of
2553 # manifest delta as fairly "cheap" and allow for skipping whole part of
2540 # the tree that a full read would access. So it might be a good idea.
2554 # the tree that a full read would access. So it might be a good idea.
2541 #
2555 #
2542 # If we realize we don't need delta here, we should simply use:
2556 # If we realize we don't need delta here, we should simply use:
2543 #
2557 #
2544 # return (None, self.read())
2558 # return (None, self.read())
2545 if can_use_delta:
2559 if can_use_delta:
2546 return (None, self._read_storage_slow_delta(base=deltaparent))
2560 return (None, self._read_storage_slow_delta(base=deltaparent))
2547 else:
2561 else:
2548 parents = [
2562 parents = [
2549 p
2563 p
2550 for p in store.parentrevs(r)
2564 for p in store.parentrevs(r)
2551 if p is not nullrev and p in valid_bases
2565 if p is not nullrev and p in valid_bases
2552 ]
2566 ]
2553 if parents:
2567 if parents:
2554 best_base = max(parents)
2568 best_base = max(parents)
2555 else:
2569 else:
2556 best_base = max(valid_bases)
2570 best_base = max(valid_bases)
2557 return (None, self._read_storage_slow_delta(base=best_base))
2571 return (None, self._read_storage_slow_delta(base=best_base))
2558
2572
2559 def _read_storage_delta_shallow(self) -> ManifestDict:
2573 def _read_storage_delta_shallow(self) -> ManifestDict:
2560 store = self._storage()
2574 store = self._storage()
2561 r = store.rev(self._node)
2575 r = store.rev(self._node)
2562 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2576 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2563 return manifestdict(store.nodeconstants.nodelen, d)
2577 return manifestdict(store.nodeconstants.nodelen, d)
2564
2578
2565 def _read_storage_slow_delta(self, base) -> 'TreeManifest':
2579 def _read_storage_slow_delta(self, base) -> 'TreeManifest':
2566 store = self._storage()
2580 store = self._storage()
2567 if base is None:
2581 if base is None:
2568 base = store.deltaparent(store.rev(self._node))
2582 base = store.deltaparent(store.rev(self._node))
2569 m0 = self._manifestlog.get(self._dir, store.node(base)).read()
2583 m0 = self._manifestlog.get(self._dir, store.node(base)).read()
2570 m1 = self.read()
2584 m1 = self.read()
2571 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2585 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2572 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2586 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2573 if n1:
2587 if n1:
2574 md[f] = n1
2588 md[f] = n1
2575 if fl1:
2589 if fl1:
2576 md.setflag(f, fl1)
2590 md.setflag(f, fl1)
2577 return md
2591 return md
2578
2592
2579 def read_delta_parents(
2593 def read_delta_parents(
2580 self,
2594 self,
2581 *,
2595 *,
2582 shallow: bool = False,
2596 shallow: bool = False,
2583 exact: bool = True,
2597 exact: bool = True,
2584 ) -> AnyManifestDict:
2598 ) -> AnyManifestDict:
2585 """see `interface.imanifestrevisionbase` documentations"""
2599 """see `interface.imanifestrevisionbase` documentations"""
2586 store = self._storage()
2600 store = self._storage()
2587 r = store.rev(self._node)
2601 r = store.rev(self._node)
2588 parents = [p for p in store.parentrevs(r) if p is not nullrev]
2602 parents = [p for p in store.parentrevs(r) if p is not nullrev]
2589 if not exact:
2603 if not exact:
2590 return self.read_any_fast_delta(parents, shallow=shallow)[1]
2604 return self.read_any_fast_delta(parents, shallow=shallow)[1]
2591 elif len(parents) == 0:
2605 elif len(parents) == 0:
2592 if shallow:
2606 if shallow:
2593 d = store.revision(self._node)
2607 d = store.revision(self._node)
2594 return manifestdict(store.nodeconstants.nodelen, d)
2608 return manifestdict(store.nodeconstants.nodelen, d)
2595 else:
2609 else:
2596 return self.read()
2610 return self.read()
2597 elif len(parents) == 1:
2611 elif len(parents) == 1:
2598 p = parents[0]
2612 p = parents[0]
2599 if shallow:
2613 if shallow:
2600 d = mdiff.patchtext(store.revdiff(p, r))
2614 d = mdiff.patchtext(store.revdiff(p, r))
2601 return manifestdict(store.nodeconstants.nodelen, d)
2615 return manifestdict(store.nodeconstants.nodelen, d)
2602 else:
2616 else:
2603 return self._read_storage_slow_delta(base=p)
2617 return self._read_storage_slow_delta(base=p)
2604 else:
2618 else:
2605 p1, p2 = parents
2619 p1, p2 = parents
2606 if shallow:
2620 if shallow:
2607 d1 = mdiff.patchtext(store.revdiff(p1, r))
2621 d1 = mdiff.patchtext(store.revdiff(p1, r))
2608 d2 = mdiff.patchtext(store.revdiff(p2, r))
2622 d2 = mdiff.patchtext(store.revdiff(p2, r))
2609 d1 = manifestdict(store.nodeconstants.nodelen, d1)
2623 d1 = manifestdict(store.nodeconstants.nodelen, d1)
2610 d2 = manifestdict(store.nodeconstants.nodelen, d2)
2624 d2 = manifestdict(store.nodeconstants.nodelen, d2)
2611 md = manifestdict(store.nodeconstants.nodelen)
2625 md = manifestdict(store.nodeconstants.nodelen)
2612 for f, new_node, new_flag in d1.iterentries():
2626 for f, new_node, new_flag in d1.iterentries():
2613 if f not in d2:
2627 if f not in d2:
2614 continue
2628 continue
2615 if new_node is not None:
2629 if new_node is not None:
2616 md.set(f, new_node, new_flag)
2630 md.set(f, new_node, new_flag)
2617 return md
2631 return md
2618 else:
2632 else:
2619 m1 = self._manifestlog.get(self._dir, store.node(p1)).read()
2633 m1 = self._manifestlog.get(self._dir, store.node(p1)).read()
2620 m2 = self._manifestlog.get(self._dir, store.node(p2)).read()
2634 m2 = self._manifestlog.get(self._dir, store.node(p2)).read()
2621 mc = self.read()
2635 mc = self.read()
2622 d1 = m1.diff(mc)
2636 d1 = m1.diff(mc)
2623 d2 = m2.diff(mc)
2637 d2 = m2.diff(mc)
2624 md = treemanifest(
2638 md = treemanifest(
2625 self._manifestlog.nodeconstants,
2639 self._manifestlog.nodeconstants,
2626 dir=self._dir,
2640 dir=self._dir,
2627 )
2641 )
2628 for f, new_node, new_flag in d1.iterentries():
2642 for f, new_node, new_flag in d1.iterentries():
2629 if f not in d2:
2643 if f not in d2:
2630 continue
2644 continue
2631 if new_node is not None:
2645 if new_node is not None:
2632 md.set(f, new_node, new_flag)
2646 md.set(f, new_node, new_flag)
2633 return md
2647 return md
2634
2648
2635 def read_delta_new_entries(
2649 def read_delta_new_entries(
2636 self, *, shallow: bool = False
2650 self, *, shallow: bool = False
2637 ) -> AnyManifestDict:
2651 ) -> AnyManifestDict:
2638 """see `interface.imanifestrevisionbase` documentations"""
2652 """see `interface.imanifestrevisionbase` documentations"""
2639 # If we are using narrow, returning a delta against an arbitrary
2653 # If we are using narrow, returning a delta against an arbitrary
2640 # changeset might return file outside the narrowspec. This can create
2654 # changeset might return file outside the narrowspec. This can create
2641 # issue when running validation server side with strict security as
2655 # issue when running validation server side with strict security as
2642 # push from low priviledge usage might be seen as adding new revision
2656 # push from low priviledge usage might be seen as adding new revision
2643 # for files they cannot touch. So we are strict if narrow is involved.
2657 # for files they cannot touch. So we are strict if narrow is involved.
2644 if self._manifestlog.narrowed:
2658 if self._manifestlog.narrowed:
2645 return self.read_delta_parents(shallow=shallow, exact=True)
2659 return self.read_delta_parents(shallow=shallow, exact=True)
2646 # delegate to existing another existing method for simplicity
2660 # delegate to existing another existing method for simplicity
2647 store = self._storage()
2661 store = self._storage()
2648 r = store.rev(self._node)
2662 r = store.rev(self._node)
2649 bases = (store.deltaparent(r),)
2663 bases = (store.deltaparent(r),)
2650 return self.read_any_fast_delta(bases, shallow=shallow)[1]
2664 return self.read_any_fast_delta(bases, shallow=shallow)[1]
2651
2665
2652 def readfast(self, shallow=False) -> AnyManifestDict:
2666 def readfast(self, shallow=False) -> AnyManifestDict:
2653 """Calls either readdelta or read, based on which would be less work.
2667 """Calls either readdelta or read, based on which would be less work.
2654 readdelta is called if the delta is against the p1, and therefore can be
2668 readdelta is called if the delta is against the p1, and therefore can be
2655 read quickly.
2669 read quickly.
2656
2670
2657 If `shallow` is True, it only returns the entries from this manifest,
2671 If `shallow` is True, it only returns the entries from this manifest,
2658 and not any submanifests.
2672 and not any submanifests.
2659 """
2673 """
2660 util.nouideprecwarn(
2674 util.nouideprecwarn(
2661 b'"readdelta" is deprecated use "read_any_fast_delta" or "read_delta_parents"',
2675 b'"readdelta" is deprecated use "read_any_fast_delta" or "read_delta_parents"',
2662 b"6.9",
2676 b"6.9",
2663 stacklevel=2,
2677 stacklevel=2,
2664 )
2678 )
2665 store = self._storage()
2679 store = self._storage()
2666 r = store.rev(self._node)
2680 r = store.rev(self._node)
2667 deltaparent = store.deltaparent(r)
2681 deltaparent = store.deltaparent(r)
2668 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2682 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2669 return self.readdelta(shallow=shallow)
2683 return self.readdelta(shallow=shallow)
2670
2684
2671 if shallow:
2685 if shallow:
2672 return manifestdict(
2686 return manifestdict(
2673 store.nodeconstants.nodelen, store.revision(self._node)
2687 store.nodeconstants.nodelen, store.revision(self._node)
2674 )
2688 )
2675 else:
2689 else:
2676 return self.read()
2690 return self.read()
2677
2691
2678 def find(self, key: bytes) -> Tuple[bytes, bytes]:
2692 def find(self, key: bytes) -> Tuple[bytes, bytes]:
2679 return self.read().find(key)
2693 return self.read().find(key)
2680
2694
2681
2695
2682 treemanifestctx = interfaceutil.implementer(repository.imanifestrevisionstored)(
2696 treemanifestctx = interfaceutil.implementer(repository.imanifestrevisionstored)(
2683 TreeManifestCtx
2697 TreeManifestCtx
2684 )
2698 )
2685
2699
2686 if typing.TYPE_CHECKING:
2700 if typing.TYPE_CHECKING:
2687 treemanifestctx = TreeManifestCtx
2701 treemanifestctx = TreeManifestCtx
2688
2702
2689
2703
2690 class excludeddir(treemanifest):
2704 class excludeddir(treemanifest):
2691 """Stand-in for a directory that is excluded from the repository.
2705 """Stand-in for a directory that is excluded from the repository.
2692
2706
2693 With narrowing active on a repository that uses treemanifests,
2707 With narrowing active on a repository that uses treemanifests,
2694 some of the directory revlogs will be excluded from the resulting
2708 some of the directory revlogs will be excluded from the resulting
2695 clone. This is a huge storage win for clients, but means we need
2709 clone. This is a huge storage win for clients, but means we need
2696 some sort of pseudo-manifest to surface to internals so we can
2710 some sort of pseudo-manifest to surface to internals so we can
2697 detect a merge conflict outside the narrowspec. That's what this
2711 detect a merge conflict outside the narrowspec. That's what this
2698 class is: it stands in for a directory whose node is known, but
2712 class is: it stands in for a directory whose node is known, but
2699 whose contents are unknown.
2713 whose contents are unknown.
2700 """
2714 """
2701
2715
2716 _files: Dict[bytes, bytes]
2717 _flags: Dict[bytes, bytes]
2718
2702 def __init__(self, nodeconstants, dir, node):
2719 def __init__(self, nodeconstants, dir, node):
2703 super(excludeddir, self).__init__(nodeconstants, dir)
2720 super(excludeddir, self).__init__(nodeconstants, dir)
2704 self._node = node
2721 self._node = node
2705 # Add an empty file, which will be included by iterators and such,
2722 # Add an empty file, which will be included by iterators and such,
2706 # appearing as the directory itself (i.e. something like "dir/")
2723 # appearing as the directory itself (i.e. something like "dir/")
2707 self._files[b''] = node
2724 self._files[b''] = node
2708 self._flags[b''] = b't'
2725 self._flags[b''] = b't'
2709
2726
2710 # Manifests outside the narrowspec should never be modified, so avoid
2727 # Manifests outside the narrowspec should never be modified, so avoid
2711 # copying. This makes a noticeable difference when there are very many
2728 # copying. This makes a noticeable difference when there are very many
2712 # directories outside the narrowspec. Also, it makes sense for the copy to
2729 # directories outside the narrowspec. Also, it makes sense for the copy to
2713 # be of the same type as the original, which would not happen with the
2730 # be of the same type as the original, which would not happen with the
2714 # super type's copy().
2731 # super type's copy().
2715 def copy(self):
2732 def copy(self):
2716 return self
2733 return self
2717
2734
2718
2735
2719 class excludeddirmanifestctx(treemanifestctx):
2736 class excludeddirmanifestctx(treemanifestctx):
2720 """context wrapper for excludeddir - see that docstring for rationale"""
2737 """context wrapper for excludeddir - see that docstring for rationale"""
2721
2738
2722 def __init__(self, nodeconstants, dir, node):
2739 def __init__(self, nodeconstants, dir, node):
2723 self.nodeconstants = nodeconstants
2740 self.nodeconstants = nodeconstants
2724 self._dir = dir
2741 self._dir = dir
2725 self._node = node
2742 self._node = node
2726
2743
2727 def read(self):
2744 def read(self):
2728 return excludeddir(self.nodeconstants, self._dir, self._node)
2745 return excludeddir(self.nodeconstants, self._dir, self._node)
2729
2746
2730 def readfast(self, shallow=False):
2747 def readfast(self, shallow=False):
2731 # special version of readfast since we don't have underlying storage
2748 # special version of readfast since we don't have underlying storage
2732 return self.read()
2749 return self.read()
2733
2750
2734 def write(self, *args):
2751 def write(self, *args):
2735 raise error.ProgrammingError(
2752 raise error.ProgrammingError(
2736 b'attempt to write manifest from excluded dir %s' % self._dir
2753 b'attempt to write manifest from excluded dir %s' % self._dir
2737 )
2754 )
2738
2755
2739
2756
2740 class excludedmanifestrevlog(manifestrevlog):
2757 class excludedmanifestrevlog(manifestrevlog):
2741 """Stand-in for excluded treemanifest revlogs.
2758 """Stand-in for excluded treemanifest revlogs.
2742
2759
2743 When narrowing is active on a treemanifest repository, we'll have
2760 When narrowing is active on a treemanifest repository, we'll have
2744 references to directories we can't see due to the revlog being
2761 references to directories we can't see due to the revlog being
2745 skipped. This class exists to conform to the manifestrevlog
2762 skipped. This class exists to conform to the manifestrevlog
2746 interface for those directories and proactively prevent writes to
2763 interface for those directories and proactively prevent writes to
2747 outside the narrowspec.
2764 outside the narrowspec.
2748 """
2765 """
2749
2766
2750 def __init__(self, nodeconstants, dir):
2767 def __init__(self, nodeconstants, dir):
2751 self.nodeconstants = nodeconstants
2768 self.nodeconstants = nodeconstants
2752 self._dir = dir
2769 self._dir = dir
2753
2770
2754 def __len__(self):
2771 def __len__(self):
2755 raise error.ProgrammingError(
2772 raise error.ProgrammingError(
2756 b'attempt to get length of excluded dir %s' % self._dir
2773 b'attempt to get length of excluded dir %s' % self._dir
2757 )
2774 )
2758
2775
2759 def rev(self, node):
2776 def rev(self, node):
2760 raise error.ProgrammingError(
2777 raise error.ProgrammingError(
2761 b'attempt to get rev from excluded dir %s' % self._dir
2778 b'attempt to get rev from excluded dir %s' % self._dir
2762 )
2779 )
2763
2780
2764 def linkrev(self, node):
2781 def linkrev(self, node):
2765 raise error.ProgrammingError(
2782 raise error.ProgrammingError(
2766 b'attempt to get linkrev from excluded dir %s' % self._dir
2783 b'attempt to get linkrev from excluded dir %s' % self._dir
2767 )
2784 )
2768
2785
2769 def node(self, rev):
2786 def node(self, rev):
2770 raise error.ProgrammingError(
2787 raise error.ProgrammingError(
2771 b'attempt to get node from excluded dir %s' % self._dir
2788 b'attempt to get node from excluded dir %s' % self._dir
2772 )
2789 )
2773
2790
2774 def add(self, *args, **kwargs):
2791 def add(self, *args, **kwargs):
2775 # We should never write entries in dirlogs outside the narrow clone.
2792 # We should never write entries in dirlogs outside the narrow clone.
2776 # However, the method still gets called from writesubtree() in
2793 # However, the method still gets called from writesubtree() in
2777 # _addtree(), so we need to handle it. We should possibly make that
2794 # _addtree(), so we need to handle it. We should possibly make that
2778 # avoid calling add() with a clean manifest (_dirty is always False
2795 # avoid calling add() with a clean manifest (_dirty is always False
2779 # in excludeddir instances).
2796 # in excludeddir instances).
2780 pass
2797 pass
@@ -1,4123 +1,4125 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import functools
19 import functools
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import typing
23 import typing
24 import weakref
24 import weakref
25 import zlib
25 import zlib
26
26
27 from typing import (
27 from typing import (
28 Iterable,
29 Iterator,
28 Optional,
30 Optional,
29 Tuple,
31 Tuple,
30 )
32 )
31
33
32 # import stuff from node for others to import from revlog
34 # import stuff from node for others to import from revlog
33 from .node import (
35 from .node import (
34 bin,
36 bin,
35 hex,
37 hex,
36 nullrev,
38 nullrev,
37 sha1nodeconstants,
39 sha1nodeconstants,
38 short,
40 short,
39 wdirrev,
41 wdirrev,
40 )
42 )
41 from .i18n import _
43 from .i18n import _
42 from .revlogutils.constants import (
44 from .revlogutils.constants import (
43 ALL_KINDS,
45 ALL_KINDS,
44 CHANGELOGV2,
46 CHANGELOGV2,
45 COMP_MODE_DEFAULT,
47 COMP_MODE_DEFAULT,
46 COMP_MODE_INLINE,
48 COMP_MODE_INLINE,
47 COMP_MODE_PLAIN,
49 COMP_MODE_PLAIN,
48 DELTA_BASE_REUSE_NO,
50 DELTA_BASE_REUSE_NO,
49 DELTA_BASE_REUSE_TRY,
51 DELTA_BASE_REUSE_TRY,
50 ENTRY_RANK,
52 ENTRY_RANK,
51 FEATURES_BY_VERSION,
53 FEATURES_BY_VERSION,
52 FLAG_GENERALDELTA,
54 FLAG_GENERALDELTA,
53 FLAG_INLINE_DATA,
55 FLAG_INLINE_DATA,
54 INDEX_HEADER,
56 INDEX_HEADER,
55 KIND_CHANGELOG,
57 KIND_CHANGELOG,
56 KIND_FILELOG,
58 KIND_FILELOG,
57 RANK_UNKNOWN,
59 RANK_UNKNOWN,
58 REVLOGV0,
60 REVLOGV0,
59 REVLOGV1,
61 REVLOGV1,
60 REVLOGV1_FLAGS,
62 REVLOGV1_FLAGS,
61 REVLOGV2,
63 REVLOGV2,
62 REVLOGV2_FLAGS,
64 REVLOGV2_FLAGS,
63 REVLOG_DEFAULT_FLAGS,
65 REVLOG_DEFAULT_FLAGS,
64 REVLOG_DEFAULT_FORMAT,
66 REVLOG_DEFAULT_FORMAT,
65 REVLOG_DEFAULT_VERSION,
67 REVLOG_DEFAULT_VERSION,
66 SUPPORTED_FLAGS,
68 SUPPORTED_FLAGS,
67 )
69 )
68 from .revlogutils.flagutil import (
70 from .revlogutils.flagutil import (
69 REVIDX_DEFAULT_FLAGS,
71 REVIDX_DEFAULT_FLAGS,
70 REVIDX_ELLIPSIS,
72 REVIDX_ELLIPSIS,
71 REVIDX_EXTSTORED,
73 REVIDX_EXTSTORED,
72 REVIDX_FLAGS_ORDER,
74 REVIDX_FLAGS_ORDER,
73 REVIDX_HASCOPIESINFO,
75 REVIDX_HASCOPIESINFO,
74 REVIDX_ISCENSORED,
76 REVIDX_ISCENSORED,
75 REVIDX_RAWTEXT_CHANGING_FLAGS,
77 REVIDX_RAWTEXT_CHANGING_FLAGS,
76 )
78 )
77 from .thirdparty import attr
79 from .thirdparty import attr
78
80
79 # Force pytype to use the non-vendored package
81 # Force pytype to use the non-vendored package
80 if typing.TYPE_CHECKING:
82 if typing.TYPE_CHECKING:
81 # noinspection PyPackageRequirements
83 # noinspection PyPackageRequirements
82 import attr
84 import attr
83
85
84 from . import (
86 from . import (
85 ancestor,
87 ancestor,
86 dagop,
88 dagop,
87 error,
89 error,
88 mdiff,
90 mdiff,
89 policy,
91 policy,
90 pycompat,
92 pycompat,
91 revlogutils,
93 revlogutils,
92 templatefilters,
94 templatefilters,
93 util,
95 util,
94 vfs as vfsmod,
96 vfs as vfsmod,
95 )
97 )
96 from .interfaces import (
98 from .interfaces import (
97 repository,
99 repository,
98 util as interfaceutil,
100 util as interfaceutil,
99 )
101 )
100 from .revlogutils import (
102 from .revlogutils import (
101 deltas as deltautil,
103 deltas as deltautil,
102 docket as docketutil,
104 docket as docketutil,
103 flagutil,
105 flagutil,
104 nodemap as nodemaputil,
106 nodemap as nodemaputil,
105 randomaccessfile,
107 randomaccessfile,
106 revlogv0,
108 revlogv0,
107 rewrite,
109 rewrite,
108 sidedata as sidedatautil,
110 sidedata as sidedatautil,
109 )
111 )
110 from .utils import (
112 from .utils import (
111 storageutil,
113 storageutil,
112 stringutil,
114 stringutil,
113 )
115 )
114
116
115 # blanked usage of all the name to prevent pyflakes constraints
117 # blanked usage of all the name to prevent pyflakes constraints
116 # We need these name available in the module for extensions.
118 # We need these name available in the module for extensions.
117
119
118 REVLOGV0
120 REVLOGV0
119 REVLOGV1
121 REVLOGV1
120 REVLOGV2
122 REVLOGV2
121 CHANGELOGV2
123 CHANGELOGV2
122 FLAG_INLINE_DATA
124 FLAG_INLINE_DATA
123 FLAG_GENERALDELTA
125 FLAG_GENERALDELTA
124 REVLOG_DEFAULT_FLAGS
126 REVLOG_DEFAULT_FLAGS
125 REVLOG_DEFAULT_FORMAT
127 REVLOG_DEFAULT_FORMAT
126 REVLOG_DEFAULT_VERSION
128 REVLOG_DEFAULT_VERSION
127 REVLOGV1_FLAGS
129 REVLOGV1_FLAGS
128 REVLOGV2_FLAGS
130 REVLOGV2_FLAGS
129 REVIDX_ISCENSORED
131 REVIDX_ISCENSORED
130 REVIDX_ELLIPSIS
132 REVIDX_ELLIPSIS
131 REVIDX_HASCOPIESINFO
133 REVIDX_HASCOPIESINFO
132 REVIDX_EXTSTORED
134 REVIDX_EXTSTORED
133 REVIDX_DEFAULT_FLAGS
135 REVIDX_DEFAULT_FLAGS
134 REVIDX_FLAGS_ORDER
136 REVIDX_FLAGS_ORDER
135 REVIDX_RAWTEXT_CHANGING_FLAGS
137 REVIDX_RAWTEXT_CHANGING_FLAGS
136
138
137 parsers = policy.importmod('parsers')
139 parsers = policy.importmod('parsers')
138 rustancestor = policy.importrust('ancestor')
140 rustancestor = policy.importrust('ancestor')
139 rustdagop = policy.importrust('dagop')
141 rustdagop = policy.importrust('dagop')
140 rustrevlog = policy.importrust('revlog')
142 rustrevlog = policy.importrust('revlog')
141
143
142 # Aliased for performance.
144 # Aliased for performance.
143 _zlibdecompress = zlib.decompress
145 _zlibdecompress = zlib.decompress
144
146
145 # max size of inline data embedded into a revlog
147 # max size of inline data embedded into a revlog
146 _maxinline = 131072
148 _maxinline = 131072
147
149
148
150
149 # Flag processors for REVIDX_ELLIPSIS.
151 # Flag processors for REVIDX_ELLIPSIS.
150 def ellipsisreadprocessor(rl, text):
152 def ellipsisreadprocessor(rl, text):
151 return text, False
153 return text, False
152
154
153
155
154 def ellipsiswriteprocessor(rl, text):
156 def ellipsiswriteprocessor(rl, text):
155 return text, False
157 return text, False
156
158
157
159
158 def ellipsisrawprocessor(rl, text):
160 def ellipsisrawprocessor(rl, text):
159 return False
161 return False
160
162
161
163
162 ellipsisprocessor = (
164 ellipsisprocessor = (
163 ellipsisreadprocessor,
165 ellipsisreadprocessor,
164 ellipsiswriteprocessor,
166 ellipsiswriteprocessor,
165 ellipsisrawprocessor,
167 ellipsisrawprocessor,
166 )
168 )
167
169
168
170
169 def _verify_revision(rl, skipflags, state, node):
171 def _verify_revision(rl, skipflags, state, node):
170 """Verify the integrity of the given revlog ``node`` while providing a hook
172 """Verify the integrity of the given revlog ``node`` while providing a hook
171 point for extensions to influence the operation."""
173 point for extensions to influence the operation."""
172 if skipflags:
174 if skipflags:
173 state[b'skipread'].add(node)
175 state[b'skipread'].add(node)
174 else:
176 else:
175 # Side-effect: read content and verify hash.
177 # Side-effect: read content and verify hash.
176 rl.revision(node)
178 rl.revision(node)
177
179
178
180
179 # True if a fast implementation for persistent-nodemap is available
181 # True if a fast implementation for persistent-nodemap is available
180 #
182 #
181 # We also consider we have a "fast" implementation in "pure" python because
183 # We also consider we have a "fast" implementation in "pure" python because
182 # people using pure don't really have performance consideration (and a
184 # people using pure don't really have performance consideration (and a
183 # wheelbarrow of other slowness source)
185 # wheelbarrow of other slowness source)
184 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
186 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
185 parsers, 'BaseIndexObject'
187 parsers, 'BaseIndexObject'
186 )
188 )
187
189
188
190
189 @attr.s(slots=True)
191 @attr.s(slots=True)
190 class RevLogRevisionDelta:
192 class RevLogRevisionDelta:
191 node = attr.ib()
193 node = attr.ib()
192 p1node = attr.ib()
194 p1node = attr.ib()
193 p2node = attr.ib()
195 p2node = attr.ib()
194 basenode = attr.ib()
196 basenode = attr.ib()
195 flags = attr.ib()
197 flags = attr.ib()
196 baserevisionsize = attr.ib()
198 baserevisionsize = attr.ib()
197 revision = attr.ib()
199 revision = attr.ib()
198 delta = attr.ib()
200 delta = attr.ib()
199 sidedata = attr.ib()
201 sidedata = attr.ib()
200 protocol_flags = attr.ib()
202 protocol_flags = attr.ib()
201 linknode = attr.ib(default=None)
203 linknode = attr.ib(default=None)
202
204
203
205
204 revlogrevisiondelta = interfaceutil.implementer(repository.irevisiondelta)(
206 revlogrevisiondelta = interfaceutil.implementer(repository.irevisiondelta)(
205 RevLogRevisionDelta
207 RevLogRevisionDelta
206 )
208 )
207
209
208 if typing.TYPE_CHECKING:
210 if typing.TYPE_CHECKING:
209 revlogrevisiondelta = RevLogRevisionDelta
211 revlogrevisiondelta = RevLogRevisionDelta
210
212
211
213
212 @attr.s(frozen=True)
214 @attr.s(frozen=True)
213 class RevLogProblem:
215 class RevLogProblem:
214 warning = attr.ib(default=None, type=Optional[bytes])
216 warning = attr.ib(default=None, type=Optional[bytes])
215 error = attr.ib(default=None, type=Optional[bytes])
217 error = attr.ib(default=None, type=Optional[bytes])
216 node = attr.ib(default=None, type=Optional[bytes])
218 node = attr.ib(default=None, type=Optional[bytes])
217
219
218
220
219 revlogproblem = interfaceutil.implementer(repository.iverifyproblem)(
221 revlogproblem = interfaceutil.implementer(repository.iverifyproblem)(
220 RevLogProblem
222 RevLogProblem
221 )
223 )
222
224
223 if typing.TYPE_CHECKING:
225 if typing.TYPE_CHECKING:
224 revlogproblem = RevLogProblem
226 revlogproblem = RevLogProblem
225
227
226
228
227 def parse_index_v1(data, inline):
229 def parse_index_v1(data, inline):
228 # call the C implementation to parse the index data
230 # call the C implementation to parse the index data
229 index, cache = parsers.parse_index2(data, inline)
231 index, cache = parsers.parse_index2(data, inline)
230 return index, cache
232 return index, cache
231
233
232
234
233 def parse_index_v2(data, inline):
235 def parse_index_v2(data, inline):
234 # call the C implementation to parse the index data
236 # call the C implementation to parse the index data
235 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
237 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
236 return index, cache
238 return index, cache
237
239
238
240
239 def parse_index_cl_v2(data, inline):
241 def parse_index_cl_v2(data, inline):
240 # call the C implementation to parse the index data
242 # call the C implementation to parse the index data
241 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
243 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
242 return index, cache
244 return index, cache
243
245
244
246
245 if hasattr(parsers, 'parse_index_devel_nodemap'):
247 if hasattr(parsers, 'parse_index_devel_nodemap'):
246
248
247 def parse_index_v1_nodemap(data, inline):
249 def parse_index_v1_nodemap(data, inline):
248 index, cache = parsers.parse_index_devel_nodemap(data, inline)
250 index, cache = parsers.parse_index_devel_nodemap(data, inline)
249 return index, cache
251 return index, cache
250
252
251 else:
253 else:
252 parse_index_v1_nodemap = None
254 parse_index_v1_nodemap = None
253
255
254
256
255 def parse_index_v1_rust(data, inline, default_header):
257 def parse_index_v1_rust(data, inline, default_header):
256 cache = (0, data) if inline else None
258 cache = (0, data) if inline else None
257 return rustrevlog.Index(data, default_header), cache
259 return rustrevlog.Index(data, default_header), cache
258
260
259
261
260 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
262 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
261 # signed integer)
263 # signed integer)
262 _maxentrysize = 0x7FFFFFFF
264 _maxentrysize = 0x7FFFFFFF
263
265
264 FILE_TOO_SHORT_MSG = _(
266 FILE_TOO_SHORT_MSG = _(
265 b'cannot read from revlog %s;'
267 b'cannot read from revlog %s;'
266 b' expected %d bytes from offset %d, data size is %d'
268 b' expected %d bytes from offset %d, data size is %d'
267 )
269 )
268
270
269 hexdigits = b'0123456789abcdefABCDEF'
271 hexdigits = b'0123456789abcdefABCDEF'
270
272
271
273
272 class _Config:
274 class _Config:
273 def copy(self):
275 def copy(self):
274 return self.__class__(**self.__dict__)
276 return self.__class__(**self.__dict__)
275
277
276
278
277 @attr.s()
279 @attr.s()
278 class FeatureConfig(_Config):
280 class FeatureConfig(_Config):
279 """Hold configuration values about the available revlog features"""
281 """Hold configuration values about the available revlog features"""
280
282
281 # the default compression engine
283 # the default compression engine
282 compression_engine = attr.ib(default=b'zlib')
284 compression_engine = attr.ib(default=b'zlib')
283 # compression engines options
285 # compression engines options
284 compression_engine_options = attr.ib(default=attr.Factory(dict))
286 compression_engine_options = attr.ib(default=attr.Factory(dict))
285
287
286 # can we use censor on this revlog
288 # can we use censor on this revlog
287 censorable = attr.ib(default=False)
289 censorable = attr.ib(default=False)
288 # does this revlog use the "side data" feature
290 # does this revlog use the "side data" feature
289 has_side_data = attr.ib(default=False)
291 has_side_data = attr.ib(default=False)
290 # might remove rank configuration once the computation has no impact
292 # might remove rank configuration once the computation has no impact
291 compute_rank = attr.ib(default=False)
293 compute_rank = attr.ib(default=False)
292 # parent order is supposed to be semantically irrelevant, so we
294 # parent order is supposed to be semantically irrelevant, so we
293 # normally resort parents to ensure that the first parent is non-null,
295 # normally resort parents to ensure that the first parent is non-null,
294 # if there is a non-null parent at all.
296 # if there is a non-null parent at all.
295 # filelog abuses the parent order as flag to mark some instances of
297 # filelog abuses the parent order as flag to mark some instances of
296 # meta-encoded files, so allow it to disable this behavior.
298 # meta-encoded files, so allow it to disable this behavior.
297 canonical_parent_order = attr.ib(default=False)
299 canonical_parent_order = attr.ib(default=False)
298 # can ellipsis commit be used
300 # can ellipsis commit be used
299 enable_ellipsis = attr.ib(default=False)
301 enable_ellipsis = attr.ib(default=False)
300
302
301 def copy(self):
303 def copy(self):
302 new = super().copy()
304 new = super().copy()
303 new.compression_engine_options = self.compression_engine_options.copy()
305 new.compression_engine_options = self.compression_engine_options.copy()
304 return new
306 return new
305
307
306
308
307 @attr.s()
309 @attr.s()
308 class DataConfig(_Config):
310 class DataConfig(_Config):
309 """Hold configuration value about how the revlog data are read"""
311 """Hold configuration value about how the revlog data are read"""
310
312
311 # should we try to open the "pending" version of the revlog
313 # should we try to open the "pending" version of the revlog
312 try_pending = attr.ib(default=False)
314 try_pending = attr.ib(default=False)
313 # should we try to open the "splitted" version of the revlog
315 # should we try to open the "splitted" version of the revlog
314 try_split = attr.ib(default=False)
316 try_split = attr.ib(default=False)
315 # When True, indexfile should be opened with checkambig=True at writing,
317 # When True, indexfile should be opened with checkambig=True at writing,
316 # to avoid file stat ambiguity.
318 # to avoid file stat ambiguity.
317 check_ambig = attr.ib(default=False)
319 check_ambig = attr.ib(default=False)
318
320
319 # If true, use mmap instead of reading to deal with large index
321 # If true, use mmap instead of reading to deal with large index
320 mmap_large_index = attr.ib(default=False)
322 mmap_large_index = attr.ib(default=False)
321 # how much data is large
323 # how much data is large
322 mmap_index_threshold = attr.ib(default=None)
324 mmap_index_threshold = attr.ib(default=None)
323 # How much data to read and cache into the raw revlog data cache.
325 # How much data to read and cache into the raw revlog data cache.
324 chunk_cache_size = attr.ib(default=65536)
326 chunk_cache_size = attr.ib(default=65536)
325
327
326 # The size of the uncompressed cache compared to the largest revision seen.
328 # The size of the uncompressed cache compared to the largest revision seen.
327 uncompressed_cache_factor = attr.ib(default=None)
329 uncompressed_cache_factor = attr.ib(default=None)
328
330
329 # The number of chunk cached
331 # The number of chunk cached
330 uncompressed_cache_count = attr.ib(default=None)
332 uncompressed_cache_count = attr.ib(default=None)
331
333
332 # Allow sparse reading of the revlog data
334 # Allow sparse reading of the revlog data
333 with_sparse_read = attr.ib(default=False)
335 with_sparse_read = attr.ib(default=False)
334 # minimal density of a sparse read chunk
336 # minimal density of a sparse read chunk
335 sr_density_threshold = attr.ib(default=0.50)
337 sr_density_threshold = attr.ib(default=0.50)
336 # minimal size of data we skip when performing sparse read
338 # minimal size of data we skip when performing sparse read
337 sr_min_gap_size = attr.ib(default=262144)
339 sr_min_gap_size = attr.ib(default=262144)
338
340
339 # are delta encoded against arbitrary bases.
341 # are delta encoded against arbitrary bases.
340 generaldelta = attr.ib(default=False)
342 generaldelta = attr.ib(default=False)
341
343
342
344
343 @attr.s()
345 @attr.s()
344 class DeltaConfig(_Config):
346 class DeltaConfig(_Config):
345 """Hold configuration value about how new delta are computed
347 """Hold configuration value about how new delta are computed
346
348
347 Some attributes are duplicated from DataConfig to help havign each object
349 Some attributes are duplicated from DataConfig to help havign each object
348 self contained.
350 self contained.
349 """
351 """
350
352
351 # can delta be encoded against arbitrary bases.
353 # can delta be encoded against arbitrary bases.
352 general_delta = attr.ib(default=False)
354 general_delta = attr.ib(default=False)
353 # Allow sparse writing of the revlog data
355 # Allow sparse writing of the revlog data
354 sparse_revlog = attr.ib(default=False)
356 sparse_revlog = attr.ib(default=False)
355 # maximum length of a delta chain
357 # maximum length of a delta chain
356 max_chain_len = attr.ib(default=None)
358 max_chain_len = attr.ib(default=None)
357 # Maximum distance between delta chain base start and end
359 # Maximum distance between delta chain base start and end
358 max_deltachain_span = attr.ib(default=-1)
360 max_deltachain_span = attr.ib(default=-1)
359 # If `upper_bound_comp` is not None, this is the expected maximal gain from
361 # If `upper_bound_comp` is not None, this is the expected maximal gain from
360 # compression for the data content.
362 # compression for the data content.
361 upper_bound_comp = attr.ib(default=None)
363 upper_bound_comp = attr.ib(default=None)
362 # Should we try a delta against both parent
364 # Should we try a delta against both parent
363 delta_both_parents = attr.ib(default=True)
365 delta_both_parents = attr.ib(default=True)
364 # Test delta base candidate group by chunk of this maximal size.
366 # Test delta base candidate group by chunk of this maximal size.
365 candidate_group_chunk_size = attr.ib(default=0)
367 candidate_group_chunk_size = attr.ib(default=0)
366 # Should we display debug information about delta computation
368 # Should we display debug information about delta computation
367 debug_delta = attr.ib(default=False)
369 debug_delta = attr.ib(default=False)
368 # trust incoming delta by default
370 # trust incoming delta by default
369 lazy_delta = attr.ib(default=True)
371 lazy_delta = attr.ib(default=True)
370 # trust the base of incoming delta by default
372 # trust the base of incoming delta by default
371 lazy_delta_base = attr.ib(default=False)
373 lazy_delta_base = attr.ib(default=False)
372
374
373
375
374 class _InnerRevlog:
376 class _InnerRevlog:
375 """An inner layer of the revlog object
377 """An inner layer of the revlog object
376
378
377 That layer exist to be able to delegate some operation to Rust, its
379 That layer exist to be able to delegate some operation to Rust, its
378 boundaries are arbitrary and based on what we can delegate to Rust.
380 boundaries are arbitrary and based on what we can delegate to Rust.
379 """
381 """
380
382
381 opener: vfsmod.vfs
383 opener: vfsmod.vfs
382
384
383 def __init__(
385 def __init__(
384 self,
386 self,
385 opener: vfsmod.vfs,
387 opener: vfsmod.vfs,
386 index,
388 index,
387 index_file,
389 index_file,
388 data_file,
390 data_file,
389 sidedata_file,
391 sidedata_file,
390 inline,
392 inline,
391 data_config,
393 data_config,
392 delta_config,
394 delta_config,
393 feature_config,
395 feature_config,
394 chunk_cache,
396 chunk_cache,
395 default_compression_header,
397 default_compression_header,
396 ):
398 ):
397 self.opener = opener
399 self.opener = opener
398 self.index = index
400 self.index = index
399
401
400 self.index_file = index_file
402 self.index_file = index_file
401 self.data_file = data_file
403 self.data_file = data_file
402 self.sidedata_file = sidedata_file
404 self.sidedata_file = sidedata_file
403 self.inline = inline
405 self.inline = inline
404 self.data_config = data_config
406 self.data_config = data_config
405 self.delta_config = delta_config
407 self.delta_config = delta_config
406 self.feature_config = feature_config
408 self.feature_config = feature_config
407
409
408 # used during diverted write.
410 # used during diverted write.
409 self._orig_index_file = None
411 self._orig_index_file = None
410
412
411 self._default_compression_header = default_compression_header
413 self._default_compression_header = default_compression_header
412
414
413 # index
415 # index
414
416
415 # 3-tuple of file handles being used for active writing.
417 # 3-tuple of file handles being used for active writing.
416 self._writinghandles = None
418 self._writinghandles = None
417
419
418 self._segmentfile = randomaccessfile.randomaccessfile(
420 self._segmentfile = randomaccessfile.randomaccessfile(
419 self.opener,
421 self.opener,
420 (self.index_file if self.inline else self.data_file),
422 (self.index_file if self.inline else self.data_file),
421 self.data_config.chunk_cache_size,
423 self.data_config.chunk_cache_size,
422 chunk_cache,
424 chunk_cache,
423 )
425 )
424 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
426 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
425 self.opener,
427 self.opener,
426 self.sidedata_file,
428 self.sidedata_file,
427 self.data_config.chunk_cache_size,
429 self.data_config.chunk_cache_size,
428 )
430 )
429
431
430 # revlog header -> revlog compressor
432 # revlog header -> revlog compressor
431 self._decompressors = {}
433 self._decompressors = {}
432 # 3-tuple of (node, rev, text) for a raw revision.
434 # 3-tuple of (node, rev, text) for a raw revision.
433 self._revisioncache = None
435 self._revisioncache = None
434
436
435 # cache some uncompressed chunks
437 # cache some uncompressed chunks
436 # rev β†’ uncompressed_chunk
438 # rev β†’ uncompressed_chunk
437 #
439 #
438 # the max cost is dynamically updated to be proportionnal to the
440 # the max cost is dynamically updated to be proportionnal to the
439 # size of revision we actually encounter.
441 # size of revision we actually encounter.
440 self._uncompressed_chunk_cache = None
442 self._uncompressed_chunk_cache = None
441 if self.data_config.uncompressed_cache_factor is not None:
443 if self.data_config.uncompressed_cache_factor is not None:
442 self._uncompressed_chunk_cache = util.lrucachedict(
444 self._uncompressed_chunk_cache = util.lrucachedict(
443 self.data_config.uncompressed_cache_count,
445 self.data_config.uncompressed_cache_count,
444 maxcost=65536, # some arbitrary initial value
446 maxcost=65536, # some arbitrary initial value
445 )
447 )
446
448
447 self._delay_buffer = None
449 self._delay_buffer = None
448
450
449 def __len__(self):
451 def __len__(self):
450 return len(self.index)
452 return len(self.index)
451
453
452 def clear_cache(self):
454 def clear_cache(self):
453 assert not self.is_delaying
455 assert not self.is_delaying
454 self._revisioncache = None
456 self._revisioncache = None
455 if self._uncompressed_chunk_cache is not None:
457 if self._uncompressed_chunk_cache is not None:
456 self._uncompressed_chunk_cache.clear()
458 self._uncompressed_chunk_cache.clear()
457 self._segmentfile.clear_cache()
459 self._segmentfile.clear_cache()
458 self._segmentfile_sidedata.clear_cache()
460 self._segmentfile_sidedata.clear_cache()
459
461
460 @property
462 @property
461 def canonical_index_file(self):
463 def canonical_index_file(self):
462 if self._orig_index_file is not None:
464 if self._orig_index_file is not None:
463 return self._orig_index_file
465 return self._orig_index_file
464 return self.index_file
466 return self.index_file
465
467
466 @property
468 @property
467 def is_delaying(self):
469 def is_delaying(self):
468 """is the revlog is currently delaying the visibility of written data?
470 """is the revlog is currently delaying the visibility of written data?
469
471
470 The delaying mechanism can be either in-memory or written on disk in a
472 The delaying mechanism can be either in-memory or written on disk in a
471 side-file."""
473 side-file."""
472 return (self._delay_buffer is not None) or (
474 return (self._delay_buffer is not None) or (
473 self._orig_index_file is not None
475 self._orig_index_file is not None
474 )
476 )
475
477
476 # Derived from index values.
478 # Derived from index values.
477
479
478 def start(self, rev):
480 def start(self, rev):
479 """the offset of the data chunk for this revision"""
481 """the offset of the data chunk for this revision"""
480 return int(self.index[rev][0] >> 16)
482 return int(self.index[rev][0] >> 16)
481
483
482 def length(self, rev):
484 def length(self, rev):
483 """the length of the data chunk for this revision"""
485 """the length of the data chunk for this revision"""
484 return self.index[rev][1]
486 return self.index[rev][1]
485
487
486 def end(self, rev):
488 def end(self, rev):
487 """the end of the data chunk for this revision"""
489 """the end of the data chunk for this revision"""
488 return self.start(rev) + self.length(rev)
490 return self.start(rev) + self.length(rev)
489
491
490 def deltaparent(self, rev):
492 def deltaparent(self, rev):
491 """return deltaparent of the given revision"""
493 """return deltaparent of the given revision"""
492 base = self.index[rev][3]
494 base = self.index[rev][3]
493 if base == rev:
495 if base == rev:
494 return nullrev
496 return nullrev
495 elif self.delta_config.general_delta:
497 elif self.delta_config.general_delta:
496 return base
498 return base
497 else:
499 else:
498 return rev - 1
500 return rev - 1
499
501
500 def issnapshot(self, rev):
502 def issnapshot(self, rev):
501 """tells whether rev is a snapshot"""
503 """tells whether rev is a snapshot"""
502 if not self.delta_config.sparse_revlog:
504 if not self.delta_config.sparse_revlog:
503 return self.deltaparent(rev) == nullrev
505 return self.deltaparent(rev) == nullrev
504 elif hasattr(self.index, 'issnapshot'):
506 elif hasattr(self.index, 'issnapshot'):
505 # directly assign the method to cache the testing and access
507 # directly assign the method to cache the testing and access
506 self.issnapshot = self.index.issnapshot
508 self.issnapshot = self.index.issnapshot
507 return self.issnapshot(rev)
509 return self.issnapshot(rev)
508 if rev == nullrev:
510 if rev == nullrev:
509 return True
511 return True
510 entry = self.index[rev]
512 entry = self.index[rev]
511 base = entry[3]
513 base = entry[3]
512 if base == rev:
514 if base == rev:
513 return True
515 return True
514 if base == nullrev:
516 if base == nullrev:
515 return True
517 return True
516 p1 = entry[5]
518 p1 = entry[5]
517 while self.length(p1) == 0:
519 while self.length(p1) == 0:
518 b = self.deltaparent(p1)
520 b = self.deltaparent(p1)
519 if b == p1:
521 if b == p1:
520 break
522 break
521 p1 = b
523 p1 = b
522 p2 = entry[6]
524 p2 = entry[6]
523 while self.length(p2) == 0:
525 while self.length(p2) == 0:
524 b = self.deltaparent(p2)
526 b = self.deltaparent(p2)
525 if b == p2:
527 if b == p2:
526 break
528 break
527 p2 = b
529 p2 = b
528 if base == p1 or base == p2:
530 if base == p1 or base == p2:
529 return False
531 return False
530 return self.issnapshot(base)
532 return self.issnapshot(base)
531
533
532 def _deltachain(self, rev, stoprev=None):
534 def _deltachain(self, rev, stoprev=None):
533 """Obtain the delta chain for a revision.
535 """Obtain the delta chain for a revision.
534
536
535 ``stoprev`` specifies a revision to stop at. If not specified, we
537 ``stoprev`` specifies a revision to stop at. If not specified, we
536 stop at the base of the chain.
538 stop at the base of the chain.
537
539
538 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
540 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
539 revs in ascending order and ``stopped`` is a bool indicating whether
541 revs in ascending order and ``stopped`` is a bool indicating whether
540 ``stoprev`` was hit.
542 ``stoprev`` was hit.
541 """
543 """
542 generaldelta = self.delta_config.general_delta
544 generaldelta = self.delta_config.general_delta
543 # Try C implementation.
545 # Try C implementation.
544 try:
546 try:
545 return self.index.deltachain(rev, stoprev, generaldelta)
547 return self.index.deltachain(rev, stoprev, generaldelta)
546 except AttributeError:
548 except AttributeError:
547 pass
549 pass
548
550
549 chain = []
551 chain = []
550
552
551 # Alias to prevent attribute lookup in tight loop.
553 # Alias to prevent attribute lookup in tight loop.
552 index = self.index
554 index = self.index
553
555
554 iterrev = rev
556 iterrev = rev
555 e = index[iterrev]
557 e = index[iterrev]
556 while iterrev != e[3] and iterrev != stoprev:
558 while iterrev != e[3] and iterrev != stoprev:
557 chain.append(iterrev)
559 chain.append(iterrev)
558 if generaldelta:
560 if generaldelta:
559 iterrev = e[3]
561 iterrev = e[3]
560 else:
562 else:
561 iterrev -= 1
563 iterrev -= 1
562 e = index[iterrev]
564 e = index[iterrev]
563
565
564 if iterrev == stoprev:
566 if iterrev == stoprev:
565 stopped = True
567 stopped = True
566 else:
568 else:
567 chain.append(iterrev)
569 chain.append(iterrev)
568 stopped = False
570 stopped = False
569
571
570 chain.reverse()
572 chain.reverse()
571 return chain, stopped
573 return chain, stopped
572
574
573 @util.propertycache
575 @util.propertycache
574 def _compressor(self):
576 def _compressor(self):
575 engine = util.compengines[self.feature_config.compression_engine]
577 engine = util.compengines[self.feature_config.compression_engine]
576 return engine.revlogcompressor(
578 return engine.revlogcompressor(
577 self.feature_config.compression_engine_options
579 self.feature_config.compression_engine_options
578 )
580 )
579
581
580 @util.propertycache
582 @util.propertycache
581 def _decompressor(self):
583 def _decompressor(self):
582 """the default decompressor"""
584 """the default decompressor"""
583 if self._default_compression_header is None:
585 if self._default_compression_header is None:
584 return None
586 return None
585 t = self._default_compression_header
587 t = self._default_compression_header
586 c = self._get_decompressor(t)
588 c = self._get_decompressor(t)
587 return c.decompress
589 return c.decompress
588
590
589 def _get_decompressor(self, t: bytes):
591 def _get_decompressor(self, t: bytes):
590 try:
592 try:
591 compressor = self._decompressors[t]
593 compressor = self._decompressors[t]
592 except KeyError:
594 except KeyError:
593 try:
595 try:
594 engine = util.compengines.forrevlogheader(t)
596 engine = util.compengines.forrevlogheader(t)
595 compressor = engine.revlogcompressor(
597 compressor = engine.revlogcompressor(
596 self.feature_config.compression_engine_options
598 self.feature_config.compression_engine_options
597 )
599 )
598 self._decompressors[t] = compressor
600 self._decompressors[t] = compressor
599 except KeyError:
601 except KeyError:
600 raise error.RevlogError(
602 raise error.RevlogError(
601 _(b'unknown compression type %s') % binascii.hexlify(t)
603 _(b'unknown compression type %s') % binascii.hexlify(t)
602 )
604 )
603 return compressor
605 return compressor
604
606
605 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
607 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
606 """Generate a possibly-compressed representation of data."""
608 """Generate a possibly-compressed representation of data."""
607 if not data:
609 if not data:
608 return b'', data
610 return b'', data
609
611
610 compressed = self._compressor.compress(data)
612 compressed = self._compressor.compress(data)
611
613
612 if compressed:
614 if compressed:
613 # The revlog compressor added the header in the returned data.
615 # The revlog compressor added the header in the returned data.
614 return b'', compressed
616 return b'', compressed
615
617
616 if data[0:1] == b'\0':
618 if data[0:1] == b'\0':
617 return b'', data
619 return b'', data
618 return b'u', data
620 return b'u', data
619
621
620 def decompress(self, data: bytes):
622 def decompress(self, data: bytes):
621 """Decompress a revlog chunk.
623 """Decompress a revlog chunk.
622
624
623 The chunk is expected to begin with a header identifying the
625 The chunk is expected to begin with a header identifying the
624 format type so it can be routed to an appropriate decompressor.
626 format type so it can be routed to an appropriate decompressor.
625 """
627 """
626 if not data:
628 if not data:
627 return data
629 return data
628
630
629 # Revlogs are read much more frequently than they are written and many
631 # Revlogs are read much more frequently than they are written and many
630 # chunks only take microseconds to decompress, so performance is
632 # chunks only take microseconds to decompress, so performance is
631 # important here.
633 # important here.
632 #
634 #
633 # We can make a few assumptions about revlogs:
635 # We can make a few assumptions about revlogs:
634 #
636 #
635 # 1) the majority of chunks will be compressed (as opposed to inline
637 # 1) the majority of chunks will be compressed (as opposed to inline
636 # raw data).
638 # raw data).
637 # 2) decompressing *any* data will likely by at least 10x slower than
639 # 2) decompressing *any* data will likely by at least 10x slower than
638 # returning raw inline data.
640 # returning raw inline data.
639 # 3) we want to prioritize common and officially supported compression
641 # 3) we want to prioritize common and officially supported compression
640 # engines
642 # engines
641 #
643 #
642 # It follows that we want to optimize for "decompress compressed data
644 # It follows that we want to optimize for "decompress compressed data
643 # when encoded with common and officially supported compression engines"
645 # when encoded with common and officially supported compression engines"
644 # case over "raw data" and "data encoded by less common or non-official
646 # case over "raw data" and "data encoded by less common or non-official
645 # compression engines." That is why we have the inline lookup first
647 # compression engines." That is why we have the inline lookup first
646 # followed by the compengines lookup.
648 # followed by the compengines lookup.
647 #
649 #
648 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
650 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
649 # compressed chunks. And this matters for changelog and manifest reads.
651 # compressed chunks. And this matters for changelog and manifest reads.
650 t = data[0:1]
652 t = data[0:1]
651
653
652 if t == b'x':
654 if t == b'x':
653 try:
655 try:
654 return _zlibdecompress(data)
656 return _zlibdecompress(data)
655 except zlib.error as e:
657 except zlib.error as e:
656 raise error.RevlogError(
658 raise error.RevlogError(
657 _(b'revlog decompress error: %s')
659 _(b'revlog decompress error: %s')
658 % stringutil.forcebytestr(e)
660 % stringutil.forcebytestr(e)
659 )
661 )
660 # '\0' is more common than 'u' so it goes first.
662 # '\0' is more common than 'u' so it goes first.
661 elif t == b'\0':
663 elif t == b'\0':
662 return data
664 return data
663 elif t == b'u':
665 elif t == b'u':
664 return util.buffer(data, 1)
666 return util.buffer(data, 1)
665
667
666 compressor = self._get_decompressor(t)
668 compressor = self._get_decompressor(t)
667
669
668 return compressor.decompress(data)
670 return compressor.decompress(data)
669
671
670 @contextlib.contextmanager
672 @contextlib.contextmanager
671 def reading(self):
673 def reading(self):
672 """Context manager that keeps data and sidedata files open for reading"""
674 """Context manager that keeps data and sidedata files open for reading"""
673 if len(self.index) == 0:
675 if len(self.index) == 0:
674 yield # nothing to be read
676 yield # nothing to be read
675 elif self._delay_buffer is not None and self.inline:
677 elif self._delay_buffer is not None and self.inline:
676 msg = "revlog with delayed write should not be inline"
678 msg = "revlog with delayed write should not be inline"
677 raise error.ProgrammingError(msg)
679 raise error.ProgrammingError(msg)
678 else:
680 else:
679 with self._segmentfile.reading():
681 with self._segmentfile.reading():
680 with self._segmentfile_sidedata.reading():
682 with self._segmentfile_sidedata.reading():
681 yield
683 yield
682
684
683 @property
685 @property
684 def is_writing(self):
686 def is_writing(self):
685 """True is a writing context is open"""
687 """True is a writing context is open"""
686 return self._writinghandles is not None
688 return self._writinghandles is not None
687
689
688 @property
690 @property
689 def is_open(self):
691 def is_open(self):
690 """True if any file handle is being held
692 """True if any file handle is being held
691
693
692 Used for assert and debug in the python code"""
694 Used for assert and debug in the python code"""
693 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
695 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
694
696
695 @contextlib.contextmanager
697 @contextlib.contextmanager
696 def writing(self, transaction, data_end=None, sidedata_end=None):
698 def writing(self, transaction, data_end=None, sidedata_end=None):
697 """Open the revlog files for writing
699 """Open the revlog files for writing
698
700
699 Add content to a revlog should be done within such context.
701 Add content to a revlog should be done within such context.
700 """
702 """
701 if self.is_writing:
703 if self.is_writing:
702 yield
704 yield
703 else:
705 else:
704 ifh = dfh = sdfh = None
706 ifh = dfh = sdfh = None
705 try:
707 try:
706 r = len(self.index)
708 r = len(self.index)
707 # opening the data file.
709 # opening the data file.
708 dsize = 0
710 dsize = 0
709 if r:
711 if r:
710 dsize = self.end(r - 1)
712 dsize = self.end(r - 1)
711 dfh = None
713 dfh = None
712 if not self.inline:
714 if not self.inline:
713 try:
715 try:
714 dfh = self.opener(self.data_file, mode=b"r+")
716 dfh = self.opener(self.data_file, mode=b"r+")
715 if data_end is None:
717 if data_end is None:
716 dfh.seek(0, os.SEEK_END)
718 dfh.seek(0, os.SEEK_END)
717 else:
719 else:
718 dfh.seek(data_end, os.SEEK_SET)
720 dfh.seek(data_end, os.SEEK_SET)
719 except FileNotFoundError:
721 except FileNotFoundError:
720 dfh = self.opener(self.data_file, mode=b"w+")
722 dfh = self.opener(self.data_file, mode=b"w+")
721 transaction.add(self.data_file, dsize)
723 transaction.add(self.data_file, dsize)
722 if self.sidedata_file is not None:
724 if self.sidedata_file is not None:
723 assert sidedata_end is not None
725 assert sidedata_end is not None
724 # revlog-v2 does not inline, help Pytype
726 # revlog-v2 does not inline, help Pytype
725 assert dfh is not None
727 assert dfh is not None
726 try:
728 try:
727 sdfh = self.opener(self.sidedata_file, mode=b"r+")
729 sdfh = self.opener(self.sidedata_file, mode=b"r+")
728 dfh.seek(sidedata_end, os.SEEK_SET)
730 dfh.seek(sidedata_end, os.SEEK_SET)
729 except FileNotFoundError:
731 except FileNotFoundError:
730 sdfh = self.opener(self.sidedata_file, mode=b"w+")
732 sdfh = self.opener(self.sidedata_file, mode=b"w+")
731 transaction.add(self.sidedata_file, sidedata_end)
733 transaction.add(self.sidedata_file, sidedata_end)
732
734
733 # opening the index file.
735 # opening the index file.
734 isize = r * self.index.entry_size
736 isize = r * self.index.entry_size
735 ifh = self.__index_write_fp()
737 ifh = self.__index_write_fp()
736 if self.inline:
738 if self.inline:
737 transaction.add(self.index_file, dsize + isize)
739 transaction.add(self.index_file, dsize + isize)
738 else:
740 else:
739 transaction.add(self.index_file, isize)
741 transaction.add(self.index_file, isize)
740 # exposing all file handle for writing.
742 # exposing all file handle for writing.
741 self._writinghandles = (ifh, dfh, sdfh)
743 self._writinghandles = (ifh, dfh, sdfh)
742 self._segmentfile.writing_handle = ifh if self.inline else dfh
744 self._segmentfile.writing_handle = ifh if self.inline else dfh
743 self._segmentfile_sidedata.writing_handle = sdfh
745 self._segmentfile_sidedata.writing_handle = sdfh
744 yield
746 yield
745 finally:
747 finally:
746 self._writinghandles = None
748 self._writinghandles = None
747 self._segmentfile.writing_handle = None
749 self._segmentfile.writing_handle = None
748 self._segmentfile_sidedata.writing_handle = None
750 self._segmentfile_sidedata.writing_handle = None
749 if dfh is not None:
751 if dfh is not None:
750 dfh.close()
752 dfh.close()
751 if sdfh is not None:
753 if sdfh is not None:
752 sdfh.close()
754 sdfh.close()
753 # closing the index file last to avoid exposing referent to
755 # closing the index file last to avoid exposing referent to
754 # potential unflushed data content.
756 # potential unflushed data content.
755 if ifh is not None:
757 if ifh is not None:
756 ifh.close()
758 ifh.close()
757
759
758 def __index_write_fp(self, index_end=None):
760 def __index_write_fp(self, index_end=None):
759 """internal method to open the index file for writing
761 """internal method to open the index file for writing
760
762
761 You should not use this directly and use `_writing` instead
763 You should not use this directly and use `_writing` instead
762 """
764 """
763 try:
765 try:
764 if self._delay_buffer is None:
766 if self._delay_buffer is None:
765 f = self.opener(
767 f = self.opener(
766 self.index_file,
768 self.index_file,
767 mode=b"r+",
769 mode=b"r+",
768 checkambig=self.data_config.check_ambig,
770 checkambig=self.data_config.check_ambig,
769 )
771 )
770 else:
772 else:
771 # check_ambig affect we way we open file for writing, however
773 # check_ambig affect we way we open file for writing, however
772 # here, we do not actually open a file for writting as write
774 # here, we do not actually open a file for writting as write
773 # will appened to a delay_buffer. So check_ambig is not
775 # will appened to a delay_buffer. So check_ambig is not
774 # meaningful and unneeded here.
776 # meaningful and unneeded here.
775 f = randomaccessfile.appender(
777 f = randomaccessfile.appender(
776 self.opener, self.index_file, b"r+", self._delay_buffer
778 self.opener, self.index_file, b"r+", self._delay_buffer
777 )
779 )
778 if index_end is None:
780 if index_end is None:
779 f.seek(0, os.SEEK_END)
781 f.seek(0, os.SEEK_END)
780 else:
782 else:
781 f.seek(index_end, os.SEEK_SET)
783 f.seek(index_end, os.SEEK_SET)
782 return f
784 return f
783 except FileNotFoundError:
785 except FileNotFoundError:
784 if self._delay_buffer is None:
786 if self._delay_buffer is None:
785 return self.opener(
787 return self.opener(
786 self.index_file,
788 self.index_file,
787 mode=b"w+",
789 mode=b"w+",
788 checkambig=self.data_config.check_ambig,
790 checkambig=self.data_config.check_ambig,
789 )
791 )
790 else:
792 else:
791 return randomaccessfile.appender(
793 return randomaccessfile.appender(
792 self.opener, self.index_file, b"w+", self._delay_buffer
794 self.opener, self.index_file, b"w+", self._delay_buffer
793 )
795 )
794
796
795 def __index_new_fp(self):
797 def __index_new_fp(self):
796 """internal method to create a new index file for writing
798 """internal method to create a new index file for writing
797
799
798 You should not use this unless you are upgrading from inline revlog
800 You should not use this unless you are upgrading from inline revlog
799 """
801 """
800 return self.opener(
802 return self.opener(
801 self.index_file,
803 self.index_file,
802 mode=b"w",
804 mode=b"w",
803 checkambig=self.data_config.check_ambig,
805 checkambig=self.data_config.check_ambig,
804 )
806 )
805
807
806 def split_inline(self, tr, header, new_index_file_path=None):
808 def split_inline(self, tr, header, new_index_file_path=None):
807 """split the data of an inline revlog into an index and a data file"""
809 """split the data of an inline revlog into an index and a data file"""
808 assert self._delay_buffer is None
810 assert self._delay_buffer is None
809 existing_handles = False
811 existing_handles = False
810 if self._writinghandles is not None:
812 if self._writinghandles is not None:
811 existing_handles = True
813 existing_handles = True
812 fp = self._writinghandles[0]
814 fp = self._writinghandles[0]
813 fp.flush()
815 fp.flush()
814 fp.close()
816 fp.close()
815 # We can't use the cached file handle after close(). So prevent
817 # We can't use the cached file handle after close(). So prevent
816 # its usage.
818 # its usage.
817 self._writinghandles = None
819 self._writinghandles = None
818 self._segmentfile.writing_handle = None
820 self._segmentfile.writing_handle = None
819 # No need to deal with sidedata writing handle as it is only
821 # No need to deal with sidedata writing handle as it is only
820 # relevant with revlog-v2 which is never inline, not reaching
822 # relevant with revlog-v2 which is never inline, not reaching
821 # this code
823 # this code
822
824
823 new_dfh = self.opener(self.data_file, mode=b"w+")
825 new_dfh = self.opener(self.data_file, mode=b"w+")
824 new_dfh.truncate(0) # drop any potentially existing data
826 new_dfh.truncate(0) # drop any potentially existing data
825 try:
827 try:
826 with self.reading():
828 with self.reading():
827 for r in range(len(self.index)):
829 for r in range(len(self.index)):
828 new_dfh.write(self.get_segment_for_revs(r, r)[1])
830 new_dfh.write(self.get_segment_for_revs(r, r)[1])
829 new_dfh.flush()
831 new_dfh.flush()
830
832
831 if new_index_file_path is not None:
833 if new_index_file_path is not None:
832 self.index_file = new_index_file_path
834 self.index_file = new_index_file_path
833 with self.__index_new_fp() as fp:
835 with self.__index_new_fp() as fp:
834 self.inline = False
836 self.inline = False
835 for i in range(len(self.index)):
837 for i in range(len(self.index)):
836 e = self.index.entry_binary(i)
838 e = self.index.entry_binary(i)
837 if i == 0:
839 if i == 0:
838 packed_header = self.index.pack_header(header)
840 packed_header = self.index.pack_header(header)
839 e = packed_header + e
841 e = packed_header + e
840 fp.write(e)
842 fp.write(e)
841
843
842 # If we don't use side-write, the temp file replace the real
844 # If we don't use side-write, the temp file replace the real
843 # index when we exit the context manager
845 # index when we exit the context manager
844
846
845 self._segmentfile = randomaccessfile.randomaccessfile(
847 self._segmentfile = randomaccessfile.randomaccessfile(
846 self.opener,
848 self.opener,
847 self.data_file,
849 self.data_file,
848 self.data_config.chunk_cache_size,
850 self.data_config.chunk_cache_size,
849 )
851 )
850
852
851 if existing_handles:
853 if existing_handles:
852 # switched from inline to conventional reopen the index
854 # switched from inline to conventional reopen the index
853 ifh = self.__index_write_fp()
855 ifh = self.__index_write_fp()
854 self._writinghandles = (ifh, new_dfh, None)
856 self._writinghandles = (ifh, new_dfh, None)
855 self._segmentfile.writing_handle = new_dfh
857 self._segmentfile.writing_handle = new_dfh
856 new_dfh = None
858 new_dfh = None
857 # No need to deal with sidedata writing handle as it is only
859 # No need to deal with sidedata writing handle as it is only
858 # relevant with revlog-v2 which is never inline, not reaching
860 # relevant with revlog-v2 which is never inline, not reaching
859 # this code
861 # this code
860 finally:
862 finally:
861 if new_dfh is not None:
863 if new_dfh is not None:
862 new_dfh.close()
864 new_dfh.close()
863 return self.index_file
865 return self.index_file
864
866
865 def get_segment_for_revs(self, startrev, endrev):
867 def get_segment_for_revs(self, startrev, endrev):
866 """Obtain a segment of raw data corresponding to a range of revisions.
868 """Obtain a segment of raw data corresponding to a range of revisions.
867
869
868 Accepts the start and end revisions and an optional already-open
870 Accepts the start and end revisions and an optional already-open
869 file handle to be used for reading. If the file handle is read, its
871 file handle to be used for reading. If the file handle is read, its
870 seek position will not be preserved.
872 seek position will not be preserved.
871
873
872 Requests for data may be satisfied by a cache.
874 Requests for data may be satisfied by a cache.
873
875
874 Returns a 2-tuple of (offset, data) for the requested range of
876 Returns a 2-tuple of (offset, data) for the requested range of
875 revisions. Offset is the integer offset from the beginning of the
877 revisions. Offset is the integer offset from the beginning of the
876 revlog and data is a str or buffer of the raw byte data.
878 revlog and data is a str or buffer of the raw byte data.
877
879
878 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
880 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
879 to determine where each revision's data begins and ends.
881 to determine where each revision's data begins and ends.
880
882
881 API: we should consider making this a private part of the InnerRevlog
883 API: we should consider making this a private part of the InnerRevlog
882 at some point.
884 at some point.
883 """
885 """
884 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
886 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
885 # (functions are expensive).
887 # (functions are expensive).
886 index = self.index
888 index = self.index
887 istart = index[startrev]
889 istart = index[startrev]
888 start = int(istart[0] >> 16)
890 start = int(istart[0] >> 16)
889 if startrev == endrev:
891 if startrev == endrev:
890 end = start + istart[1]
892 end = start + istart[1]
891 else:
893 else:
892 iend = index[endrev]
894 iend = index[endrev]
893 end = int(iend[0] >> 16) + iend[1]
895 end = int(iend[0] >> 16) + iend[1]
894
896
895 if self.inline:
897 if self.inline:
896 start += (startrev + 1) * self.index.entry_size
898 start += (startrev + 1) * self.index.entry_size
897 end += (endrev + 1) * self.index.entry_size
899 end += (endrev + 1) * self.index.entry_size
898 length = end - start
900 length = end - start
899
901
900 return start, self._segmentfile.read_chunk(start, length)
902 return start, self._segmentfile.read_chunk(start, length)
901
903
902 def _chunk(self, rev):
904 def _chunk(self, rev):
903 """Obtain a single decompressed chunk for a revision.
905 """Obtain a single decompressed chunk for a revision.
904
906
905 Accepts an integer revision and an optional already-open file handle
907 Accepts an integer revision and an optional already-open file handle
906 to be used for reading. If used, the seek position of the file will not
908 to be used for reading. If used, the seek position of the file will not
907 be preserved.
909 be preserved.
908
910
909 Returns a str holding uncompressed data for the requested revision.
911 Returns a str holding uncompressed data for the requested revision.
910 """
912 """
911 if self._uncompressed_chunk_cache is not None:
913 if self._uncompressed_chunk_cache is not None:
912 uncomp = self._uncompressed_chunk_cache.get(rev)
914 uncomp = self._uncompressed_chunk_cache.get(rev)
913 if uncomp is not None:
915 if uncomp is not None:
914 return uncomp
916 return uncomp
915
917
916 compression_mode = self.index[rev][10]
918 compression_mode = self.index[rev][10]
917 data = self.get_segment_for_revs(rev, rev)[1]
919 data = self.get_segment_for_revs(rev, rev)[1]
918 if compression_mode == COMP_MODE_PLAIN:
920 if compression_mode == COMP_MODE_PLAIN:
919 uncomp = data
921 uncomp = data
920 elif compression_mode == COMP_MODE_DEFAULT:
922 elif compression_mode == COMP_MODE_DEFAULT:
921 uncomp = self._decompressor(data)
923 uncomp = self._decompressor(data)
922 elif compression_mode == COMP_MODE_INLINE:
924 elif compression_mode == COMP_MODE_INLINE:
923 uncomp = self.decompress(data)
925 uncomp = self.decompress(data)
924 else:
926 else:
925 msg = b'unknown compression mode %d'
927 msg = b'unknown compression mode %d'
926 msg %= compression_mode
928 msg %= compression_mode
927 raise error.RevlogError(msg)
929 raise error.RevlogError(msg)
928 if self._uncompressed_chunk_cache is not None:
930 if self._uncompressed_chunk_cache is not None:
929 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
931 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
930 return uncomp
932 return uncomp
931
933
932 def _chunks(self, revs, targetsize=None):
934 def _chunks(self, revs, targetsize=None):
933 """Obtain decompressed chunks for the specified revisions.
935 """Obtain decompressed chunks for the specified revisions.
934
936
935 Accepts an iterable of numeric revisions that are assumed to be in
937 Accepts an iterable of numeric revisions that are assumed to be in
936 ascending order. Also accepts an optional already-open file handle
938 ascending order. Also accepts an optional already-open file handle
937 to be used for reading. If used, the seek position of the file will
939 to be used for reading. If used, the seek position of the file will
938 not be preserved.
940 not be preserved.
939
941
940 This function is similar to calling ``self._chunk()`` multiple times,
942 This function is similar to calling ``self._chunk()`` multiple times,
941 but is faster.
943 but is faster.
942
944
943 Returns a list with decompressed data for each requested revision.
945 Returns a list with decompressed data for each requested revision.
944 """
946 """
945 if not revs:
947 if not revs:
946 return []
948 return []
947 start = self.start
949 start = self.start
948 length = self.length
950 length = self.length
949 inline = self.inline
951 inline = self.inline
950 iosize = self.index.entry_size
952 iosize = self.index.entry_size
951 buffer = util.buffer
953 buffer = util.buffer
952
954
953 fetched_revs = []
955 fetched_revs = []
954 fadd = fetched_revs.append
956 fadd = fetched_revs.append
955
957
956 chunks = []
958 chunks = []
957 ladd = chunks.append
959 ladd = chunks.append
958
960
959 if self._uncompressed_chunk_cache is None:
961 if self._uncompressed_chunk_cache is None:
960 fetched_revs = revs
962 fetched_revs = revs
961 else:
963 else:
962 for rev in revs:
964 for rev in revs:
963 cached_value = self._uncompressed_chunk_cache.get(rev)
965 cached_value = self._uncompressed_chunk_cache.get(rev)
964 if cached_value is None:
966 if cached_value is None:
965 fadd(rev)
967 fadd(rev)
966 else:
968 else:
967 ladd((rev, cached_value))
969 ladd((rev, cached_value))
968
970
969 if not fetched_revs:
971 if not fetched_revs:
970 slicedchunks = ()
972 slicedchunks = ()
971 elif not self.data_config.with_sparse_read:
973 elif not self.data_config.with_sparse_read:
972 slicedchunks = (fetched_revs,)
974 slicedchunks = (fetched_revs,)
973 else:
975 else:
974 slicedchunks = deltautil.slicechunk(
976 slicedchunks = deltautil.slicechunk(
975 self,
977 self,
976 fetched_revs,
978 fetched_revs,
977 targetsize=targetsize,
979 targetsize=targetsize,
978 )
980 )
979
981
980 for revschunk in slicedchunks:
982 for revschunk in slicedchunks:
981 firstrev = revschunk[0]
983 firstrev = revschunk[0]
982 # Skip trailing revisions with empty diff
984 # Skip trailing revisions with empty diff
983 for lastrev in revschunk[::-1]:
985 for lastrev in revschunk[::-1]:
984 if length(lastrev) != 0:
986 if length(lastrev) != 0:
985 break
987 break
986
988
987 try:
989 try:
988 offset, data = self.get_segment_for_revs(firstrev, lastrev)
990 offset, data = self.get_segment_for_revs(firstrev, lastrev)
989 except OverflowError:
991 except OverflowError:
990 # issue4215 - we can't cache a run of chunks greater than
992 # issue4215 - we can't cache a run of chunks greater than
991 # 2G on Windows
993 # 2G on Windows
992 for rev in revschunk:
994 for rev in revschunk:
993 ladd((rev, self._chunk(rev)))
995 ladd((rev, self._chunk(rev)))
994
996
995 decomp = self.decompress
997 decomp = self.decompress
996 # self._decompressor might be None, but will not be used in that case
998 # self._decompressor might be None, but will not be used in that case
997 def_decomp = self._decompressor
999 def_decomp = self._decompressor
998 for rev in revschunk:
1000 for rev in revschunk:
999 chunkstart = start(rev)
1001 chunkstart = start(rev)
1000 if inline:
1002 if inline:
1001 chunkstart += (rev + 1) * iosize
1003 chunkstart += (rev + 1) * iosize
1002 chunklength = length(rev)
1004 chunklength = length(rev)
1003 comp_mode = self.index[rev][10]
1005 comp_mode = self.index[rev][10]
1004 c = buffer(data, chunkstart - offset, chunklength)
1006 c = buffer(data, chunkstart - offset, chunklength)
1005 if comp_mode == COMP_MODE_PLAIN:
1007 if comp_mode == COMP_MODE_PLAIN:
1006 c = c
1008 c = c
1007 elif comp_mode == COMP_MODE_INLINE:
1009 elif comp_mode == COMP_MODE_INLINE:
1008 c = decomp(c)
1010 c = decomp(c)
1009 elif comp_mode == COMP_MODE_DEFAULT:
1011 elif comp_mode == COMP_MODE_DEFAULT:
1010 c = def_decomp(c)
1012 c = def_decomp(c)
1011 else:
1013 else:
1012 msg = b'unknown compression mode %d'
1014 msg = b'unknown compression mode %d'
1013 msg %= comp_mode
1015 msg %= comp_mode
1014 raise error.RevlogError(msg)
1016 raise error.RevlogError(msg)
1015 ladd((rev, c))
1017 ladd((rev, c))
1016 if self._uncompressed_chunk_cache is not None:
1018 if self._uncompressed_chunk_cache is not None:
1017 self._uncompressed_chunk_cache.insert(rev, c, len(c))
1019 self._uncompressed_chunk_cache.insert(rev, c, len(c))
1018
1020
1019 chunks.sort()
1021 chunks.sort()
1020 return [x[1] for x in chunks]
1022 return [x[1] for x in chunks]
1021
1023
1022 def raw_text(self, node, rev):
1024 def raw_text(self, node, rev):
1023 """return the possibly unvalidated rawtext for a revision
1025 """return the possibly unvalidated rawtext for a revision
1024
1026
1025 returns (rev, rawtext, validated)
1027 returns (rev, rawtext, validated)
1026 """
1028 """
1027
1029
1028 # revision in the cache (could be useful to apply delta)
1030 # revision in the cache (could be useful to apply delta)
1029 cachedrev = None
1031 cachedrev = None
1030 # An intermediate text to apply deltas to
1032 # An intermediate text to apply deltas to
1031 basetext = None
1033 basetext = None
1032
1034
1033 # Check if we have the entry in cache
1035 # Check if we have the entry in cache
1034 # The cache entry looks like (node, rev, rawtext)
1036 # The cache entry looks like (node, rev, rawtext)
1035 if self._revisioncache:
1037 if self._revisioncache:
1036 cachedrev = self._revisioncache[1]
1038 cachedrev = self._revisioncache[1]
1037
1039
1038 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1040 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1039 if stopped:
1041 if stopped:
1040 basetext = self._revisioncache[2]
1042 basetext = self._revisioncache[2]
1041
1043
1042 # drop cache to save memory, the caller is expected to
1044 # drop cache to save memory, the caller is expected to
1043 # update self._inner._revisioncache after validating the text
1045 # update self._inner._revisioncache after validating the text
1044 self._revisioncache = None
1046 self._revisioncache = None
1045
1047
1046 targetsize = None
1048 targetsize = None
1047 rawsize = self.index[rev][2]
1049 rawsize = self.index[rev][2]
1048 if 0 <= rawsize:
1050 if 0 <= rawsize:
1049 targetsize = 4 * rawsize
1051 targetsize = 4 * rawsize
1050
1052
1051 if self._uncompressed_chunk_cache is not None:
1053 if self._uncompressed_chunk_cache is not None:
1052 # dynamically update the uncompressed_chunk_cache size to the
1054 # dynamically update the uncompressed_chunk_cache size to the
1053 # largest revision we saw in this revlog.
1055 # largest revision we saw in this revlog.
1054 factor = self.data_config.uncompressed_cache_factor
1056 factor = self.data_config.uncompressed_cache_factor
1055 candidate_size = rawsize * factor
1057 candidate_size = rawsize * factor
1056 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1058 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1057 self._uncompressed_chunk_cache.maxcost = candidate_size
1059 self._uncompressed_chunk_cache.maxcost = candidate_size
1058
1060
1059 bins = self._chunks(chain, targetsize=targetsize)
1061 bins = self._chunks(chain, targetsize=targetsize)
1060 if basetext is None:
1062 if basetext is None:
1061 basetext = bytes(bins[0])
1063 basetext = bytes(bins[0])
1062 bins = bins[1:]
1064 bins = bins[1:]
1063
1065
1064 rawtext = mdiff.patches(basetext, bins)
1066 rawtext = mdiff.patches(basetext, bins)
1065 del basetext # let us have a chance to free memory early
1067 del basetext # let us have a chance to free memory early
1066 return (rev, rawtext, False)
1068 return (rev, rawtext, False)
1067
1069
1068 def sidedata(self, rev, sidedata_end):
1070 def sidedata(self, rev, sidedata_end):
1069 """Return the sidedata for a given revision number."""
1071 """Return the sidedata for a given revision number."""
1070 index_entry = self.index[rev]
1072 index_entry = self.index[rev]
1071 sidedata_offset = index_entry[8]
1073 sidedata_offset = index_entry[8]
1072 sidedata_size = index_entry[9]
1074 sidedata_size = index_entry[9]
1073
1075
1074 if self.inline:
1076 if self.inline:
1075 sidedata_offset += self.index.entry_size * (1 + rev)
1077 sidedata_offset += self.index.entry_size * (1 + rev)
1076 if sidedata_size == 0:
1078 if sidedata_size == 0:
1077 return {}
1079 return {}
1078
1080
1079 if sidedata_end < sidedata_offset + sidedata_size:
1081 if sidedata_end < sidedata_offset + sidedata_size:
1080 filename = self.sidedata_file
1082 filename = self.sidedata_file
1081 end = sidedata_end
1083 end = sidedata_end
1082 offset = sidedata_offset
1084 offset = sidedata_offset
1083 length = sidedata_size
1085 length = sidedata_size
1084 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1086 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1085 raise error.RevlogError(m)
1087 raise error.RevlogError(m)
1086
1088
1087 comp_segment = self._segmentfile_sidedata.read_chunk(
1089 comp_segment = self._segmentfile_sidedata.read_chunk(
1088 sidedata_offset, sidedata_size
1090 sidedata_offset, sidedata_size
1089 )
1091 )
1090
1092
1091 comp = self.index[rev][11]
1093 comp = self.index[rev][11]
1092 if comp == COMP_MODE_PLAIN:
1094 if comp == COMP_MODE_PLAIN:
1093 segment = comp_segment
1095 segment = comp_segment
1094 elif comp == COMP_MODE_DEFAULT:
1096 elif comp == COMP_MODE_DEFAULT:
1095 segment = self._decompressor(comp_segment)
1097 segment = self._decompressor(comp_segment)
1096 elif comp == COMP_MODE_INLINE:
1098 elif comp == COMP_MODE_INLINE:
1097 segment = self.decompress(comp_segment)
1099 segment = self.decompress(comp_segment)
1098 else:
1100 else:
1099 msg = b'unknown compression mode %d'
1101 msg = b'unknown compression mode %d'
1100 msg %= comp
1102 msg %= comp
1101 raise error.RevlogError(msg)
1103 raise error.RevlogError(msg)
1102
1104
1103 sidedata = sidedatautil.deserialize_sidedata(segment)
1105 sidedata = sidedatautil.deserialize_sidedata(segment)
1104 return sidedata
1106 return sidedata
1105
1107
1106 def write_entry(
1108 def write_entry(
1107 self,
1109 self,
1108 transaction,
1110 transaction,
1109 entry,
1111 entry,
1110 data,
1112 data,
1111 link,
1113 link,
1112 offset,
1114 offset,
1113 sidedata,
1115 sidedata,
1114 sidedata_offset,
1116 sidedata_offset,
1115 index_end,
1117 index_end,
1116 data_end,
1118 data_end,
1117 sidedata_end,
1119 sidedata_end,
1118 ):
1120 ):
1119 # Files opened in a+ mode have inconsistent behavior on various
1121 # Files opened in a+ mode have inconsistent behavior on various
1120 # platforms. Windows requires that a file positioning call be made
1122 # platforms. Windows requires that a file positioning call be made
1121 # when the file handle transitions between reads and writes. See
1123 # when the file handle transitions between reads and writes. See
1122 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1124 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1123 # platforms, Python or the platform itself can be buggy. Some versions
1125 # platforms, Python or the platform itself can be buggy. Some versions
1124 # of Solaris have been observed to not append at the end of the file
1126 # of Solaris have been observed to not append at the end of the file
1125 # if the file was seeked to before the end. See issue4943 for more.
1127 # if the file was seeked to before the end. See issue4943 for more.
1126 #
1128 #
1127 # We work around this issue by inserting a seek() before writing.
1129 # We work around this issue by inserting a seek() before writing.
1128 # Note: This is likely not necessary on Python 3. However, because
1130 # Note: This is likely not necessary on Python 3. However, because
1129 # the file handle is reused for reads and may be seeked there, we need
1131 # the file handle is reused for reads and may be seeked there, we need
1130 # to be careful before changing this.
1132 # to be careful before changing this.
1131 if self._writinghandles is None:
1133 if self._writinghandles is None:
1132 msg = b'adding revision outside `revlog._writing` context'
1134 msg = b'adding revision outside `revlog._writing` context'
1133 raise error.ProgrammingError(msg)
1135 raise error.ProgrammingError(msg)
1134 ifh, dfh, sdfh = self._writinghandles
1136 ifh, dfh, sdfh = self._writinghandles
1135 if index_end is None:
1137 if index_end is None:
1136 ifh.seek(0, os.SEEK_END)
1138 ifh.seek(0, os.SEEK_END)
1137 else:
1139 else:
1138 ifh.seek(index_end, os.SEEK_SET)
1140 ifh.seek(index_end, os.SEEK_SET)
1139 if dfh:
1141 if dfh:
1140 if data_end is None:
1142 if data_end is None:
1141 dfh.seek(0, os.SEEK_END)
1143 dfh.seek(0, os.SEEK_END)
1142 else:
1144 else:
1143 dfh.seek(data_end, os.SEEK_SET)
1145 dfh.seek(data_end, os.SEEK_SET)
1144 if sdfh:
1146 if sdfh:
1145 sdfh.seek(sidedata_end, os.SEEK_SET)
1147 sdfh.seek(sidedata_end, os.SEEK_SET)
1146
1148
1147 curr = len(self.index) - 1
1149 curr = len(self.index) - 1
1148 if not self.inline:
1150 if not self.inline:
1149 transaction.add(self.data_file, offset)
1151 transaction.add(self.data_file, offset)
1150 if self.sidedata_file:
1152 if self.sidedata_file:
1151 transaction.add(self.sidedata_file, sidedata_offset)
1153 transaction.add(self.sidedata_file, sidedata_offset)
1152 transaction.add(self.canonical_index_file, curr * len(entry))
1154 transaction.add(self.canonical_index_file, curr * len(entry))
1153 if data[0]:
1155 if data[0]:
1154 dfh.write(data[0])
1156 dfh.write(data[0])
1155 dfh.write(data[1])
1157 dfh.write(data[1])
1156 if sidedata:
1158 if sidedata:
1157 sdfh.write(sidedata)
1159 sdfh.write(sidedata)
1158 if self._delay_buffer is None:
1160 if self._delay_buffer is None:
1159 ifh.write(entry)
1161 ifh.write(entry)
1160 else:
1162 else:
1161 self._delay_buffer.append(entry)
1163 self._delay_buffer.append(entry)
1162 elif self._delay_buffer is not None:
1164 elif self._delay_buffer is not None:
1163 msg = b'invalid delayed write on inline revlog'
1165 msg = b'invalid delayed write on inline revlog'
1164 raise error.ProgrammingError(msg)
1166 raise error.ProgrammingError(msg)
1165 else:
1167 else:
1166 offset += curr * self.index.entry_size
1168 offset += curr * self.index.entry_size
1167 transaction.add(self.canonical_index_file, offset)
1169 transaction.add(self.canonical_index_file, offset)
1168 assert not sidedata
1170 assert not sidedata
1169 ifh.write(entry)
1171 ifh.write(entry)
1170 ifh.write(data[0])
1172 ifh.write(data[0])
1171 ifh.write(data[1])
1173 ifh.write(data[1])
1172 return (
1174 return (
1173 ifh.tell(),
1175 ifh.tell(),
1174 dfh.tell() if dfh else None,
1176 dfh.tell() if dfh else None,
1175 sdfh.tell() if sdfh else None,
1177 sdfh.tell() if sdfh else None,
1176 )
1178 )
1177
1179
1178 def _divert_index(self):
1180 def _divert_index(self):
1179 index_file = self.index_file
1181 index_file = self.index_file
1180 # when we encounter a legacy inline-changelog, split it. However it is
1182 # when we encounter a legacy inline-changelog, split it. However it is
1181 # important to use the expected filename for pending content
1183 # important to use the expected filename for pending content
1182 # (<radix>.a) otherwise hooks won't be seeing the content of the
1184 # (<radix>.a) otherwise hooks won't be seeing the content of the
1183 # pending transaction.
1185 # pending transaction.
1184 if index_file.endswith(b'.s'):
1186 if index_file.endswith(b'.s'):
1185 index_file = self.index_file[:-2]
1187 index_file = self.index_file[:-2]
1186 return index_file + b'.a'
1188 return index_file + b'.a'
1187
1189
1188 def delay(self):
1190 def delay(self):
1189 assert not self.is_open
1191 assert not self.is_open
1190 if self.inline:
1192 if self.inline:
1191 msg = "revlog with delayed write should not be inline"
1193 msg = "revlog with delayed write should not be inline"
1192 raise error.ProgrammingError(msg)
1194 raise error.ProgrammingError(msg)
1193 if self._delay_buffer is not None or self._orig_index_file is not None:
1195 if self._delay_buffer is not None or self._orig_index_file is not None:
1194 # delay or divert already in place
1196 # delay or divert already in place
1195 return None
1197 return None
1196 elif len(self.index) == 0:
1198 elif len(self.index) == 0:
1197 self._orig_index_file = self.index_file
1199 self._orig_index_file = self.index_file
1198 self.index_file = self._divert_index()
1200 self.index_file = self._divert_index()
1199 assert self._orig_index_file is not None
1201 assert self._orig_index_file is not None
1200 assert self.index_file is not None
1202 assert self.index_file is not None
1201 if self.opener.exists(self.index_file):
1203 if self.opener.exists(self.index_file):
1202 self.opener.unlink(self.index_file)
1204 self.opener.unlink(self.index_file)
1203 return self.index_file
1205 return self.index_file
1204 else:
1206 else:
1205 self._delay_buffer = []
1207 self._delay_buffer = []
1206 return None
1208 return None
1207
1209
1208 def write_pending(self):
1210 def write_pending(self):
1209 assert not self.is_open
1211 assert not self.is_open
1210 if self.inline:
1212 if self.inline:
1211 msg = "revlog with delayed write should not be inline"
1213 msg = "revlog with delayed write should not be inline"
1212 raise error.ProgrammingError(msg)
1214 raise error.ProgrammingError(msg)
1213 if self._orig_index_file is not None:
1215 if self._orig_index_file is not None:
1214 return None, True
1216 return None, True
1215 any_pending = False
1217 any_pending = False
1216 pending_index_file = self._divert_index()
1218 pending_index_file = self._divert_index()
1217 if self.opener.exists(pending_index_file):
1219 if self.opener.exists(pending_index_file):
1218 self.opener.unlink(pending_index_file)
1220 self.opener.unlink(pending_index_file)
1219 util.copyfile(
1221 util.copyfile(
1220 self.opener.join(self.index_file),
1222 self.opener.join(self.index_file),
1221 self.opener.join(pending_index_file),
1223 self.opener.join(pending_index_file),
1222 )
1224 )
1223 if self._delay_buffer:
1225 if self._delay_buffer:
1224 with self.opener(pending_index_file, b'r+') as ifh:
1226 with self.opener(pending_index_file, b'r+') as ifh:
1225 ifh.seek(0, os.SEEK_END)
1227 ifh.seek(0, os.SEEK_END)
1226 ifh.write(b"".join(self._delay_buffer))
1228 ifh.write(b"".join(self._delay_buffer))
1227 any_pending = True
1229 any_pending = True
1228 self._delay_buffer = None
1230 self._delay_buffer = None
1229 self._orig_index_file = self.index_file
1231 self._orig_index_file = self.index_file
1230 self.index_file = pending_index_file
1232 self.index_file = pending_index_file
1231 return self.index_file, any_pending
1233 return self.index_file, any_pending
1232
1234
1233 def finalize_pending(self):
1235 def finalize_pending(self):
1234 assert not self.is_open
1236 assert not self.is_open
1235 if self.inline:
1237 if self.inline:
1236 msg = "revlog with delayed write should not be inline"
1238 msg = "revlog with delayed write should not be inline"
1237 raise error.ProgrammingError(msg)
1239 raise error.ProgrammingError(msg)
1238
1240
1239 delay = self._delay_buffer is not None
1241 delay = self._delay_buffer is not None
1240 divert = self._orig_index_file is not None
1242 divert = self._orig_index_file is not None
1241
1243
1242 if delay and divert:
1244 if delay and divert:
1243 assert False, "unreachable"
1245 assert False, "unreachable"
1244 elif delay:
1246 elif delay:
1245 if self._delay_buffer:
1247 if self._delay_buffer:
1246 with self.opener(self.index_file, b'r+') as ifh:
1248 with self.opener(self.index_file, b'r+') as ifh:
1247 ifh.seek(0, os.SEEK_END)
1249 ifh.seek(0, os.SEEK_END)
1248 ifh.write(b"".join(self._delay_buffer))
1250 ifh.write(b"".join(self._delay_buffer))
1249 self._delay_buffer = None
1251 self._delay_buffer = None
1250 elif divert:
1252 elif divert:
1251 if self.opener.exists(self.index_file):
1253 if self.opener.exists(self.index_file):
1252 self.opener.rename(
1254 self.opener.rename(
1253 self.index_file,
1255 self.index_file,
1254 self._orig_index_file,
1256 self._orig_index_file,
1255 checkambig=True,
1257 checkambig=True,
1256 )
1258 )
1257 self.index_file = self._orig_index_file
1259 self.index_file = self._orig_index_file
1258 self._orig_index_file = None
1260 self._orig_index_file = None
1259 else:
1261 else:
1260 msg = b"not delay or divert found on this revlog"
1262 msg = b"not delay or divert found on this revlog"
1261 raise error.ProgrammingError(msg)
1263 raise error.ProgrammingError(msg)
1262 return self.canonical_index_file
1264 return self.canonical_index_file
1263
1265
1264
1266
1265 class revlog:
1267 class revlog:
1266 """
1268 """
1267 the underlying revision storage object
1269 the underlying revision storage object
1268
1270
1269 A revlog consists of two parts, an index and the revision data.
1271 A revlog consists of two parts, an index and the revision data.
1270
1272
1271 The index is a file with a fixed record size containing
1273 The index is a file with a fixed record size containing
1272 information on each revision, including its nodeid (hash), the
1274 information on each revision, including its nodeid (hash), the
1273 nodeids of its parents, the position and offset of its data within
1275 nodeids of its parents, the position and offset of its data within
1274 the data file, and the revision it's based on. Finally, each entry
1276 the data file, and the revision it's based on. Finally, each entry
1275 contains a linkrev entry that can serve as a pointer to external
1277 contains a linkrev entry that can serve as a pointer to external
1276 data.
1278 data.
1277
1279
1278 The revision data itself is a linear collection of data chunks.
1280 The revision data itself is a linear collection of data chunks.
1279 Each chunk represents a revision and is usually represented as a
1281 Each chunk represents a revision and is usually represented as a
1280 delta against the previous chunk. To bound lookup time, runs of
1282 delta against the previous chunk. To bound lookup time, runs of
1281 deltas are limited to about 2 times the length of the original
1283 deltas are limited to about 2 times the length of the original
1282 version data. This makes retrieval of a version proportional to
1284 version data. This makes retrieval of a version proportional to
1283 its size, or O(1) relative to the number of revisions.
1285 its size, or O(1) relative to the number of revisions.
1284
1286
1285 Both pieces of the revlog are written to in an append-only
1287 Both pieces of the revlog are written to in an append-only
1286 fashion, which means we never need to rewrite a file to insert or
1288 fashion, which means we never need to rewrite a file to insert or
1287 remove data, and can use some simple techniques to avoid the need
1289 remove data, and can use some simple techniques to avoid the need
1288 for locking while reading.
1290 for locking while reading.
1289
1291
1290 If checkambig, indexfile is opened with checkambig=True at
1292 If checkambig, indexfile is opened with checkambig=True at
1291 writing, to avoid file stat ambiguity.
1293 writing, to avoid file stat ambiguity.
1292
1294
1293 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1295 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1294 index will be mmapped rather than read if it is larger than the
1296 index will be mmapped rather than read if it is larger than the
1295 configured threshold.
1297 configured threshold.
1296
1298
1297 If censorable is True, the revlog can have censored revisions.
1299 If censorable is True, the revlog can have censored revisions.
1298
1300
1299 If `upperboundcomp` is not None, this is the expected maximal gain from
1301 If `upperboundcomp` is not None, this is the expected maximal gain from
1300 compression for the data content.
1302 compression for the data content.
1301
1303
1302 `concurrencychecker` is an optional function that receives 3 arguments: a
1304 `concurrencychecker` is an optional function that receives 3 arguments: a
1303 file handle, a filename, and an expected position. It should check whether
1305 file handle, a filename, and an expected position. It should check whether
1304 the current position in the file handle is valid, and log/warn/fail (by
1306 the current position in the file handle is valid, and log/warn/fail (by
1305 raising).
1307 raising).
1306
1308
1307 See mercurial/revlogutils/contants.py for details about the content of an
1309 See mercurial/revlogutils/contants.py for details about the content of an
1308 index entry.
1310 index entry.
1309 """
1311 """
1310
1312
1311 _flagserrorclass = error.RevlogError
1313 _flagserrorclass = error.RevlogError
1312
1314
1313 opener: vfsmod.vfs
1315 opener: vfsmod.vfs
1314
1316
1315 @staticmethod
1317 @staticmethod
1316 def is_inline_index(header_bytes):
1318 def is_inline_index(header_bytes):
1317 """Determine if a revlog is inline from the initial bytes of the index"""
1319 """Determine if a revlog is inline from the initial bytes of the index"""
1318 if len(header_bytes) == 0:
1320 if len(header_bytes) == 0:
1319 return True
1321 return True
1320
1322
1321 header = INDEX_HEADER.unpack(header_bytes)[0]
1323 header = INDEX_HEADER.unpack(header_bytes)[0]
1322
1324
1323 _format_flags = header & ~0xFFFF
1325 _format_flags = header & ~0xFFFF
1324 _format_version = header & 0xFFFF
1326 _format_version = header & 0xFFFF
1325
1327
1326 features = FEATURES_BY_VERSION[_format_version]
1328 features = FEATURES_BY_VERSION[_format_version]
1327 return features[b'inline'](_format_flags)
1329 return features[b'inline'](_format_flags)
1328
1330
1329 _docket_file: Optional[bytes]
1331 _docket_file: Optional[bytes]
1330
1332
1331 def __init__(
1333 def __init__(
1332 self,
1334 self,
1333 opener: vfsmod.vfs,
1335 opener: vfsmod.vfs,
1334 target,
1336 target,
1335 radix,
1337 radix,
1336 postfix=None, # only exist for `tmpcensored` now
1338 postfix=None, # only exist for `tmpcensored` now
1337 checkambig=False,
1339 checkambig=False,
1338 mmaplargeindex=False,
1340 mmaplargeindex=False,
1339 censorable=False,
1341 censorable=False,
1340 upperboundcomp=None,
1342 upperboundcomp=None,
1341 persistentnodemap=False,
1343 persistentnodemap=False,
1342 concurrencychecker=None,
1344 concurrencychecker=None,
1343 trypending=False,
1345 trypending=False,
1344 try_split=False,
1346 try_split=False,
1345 canonical_parent_order=True,
1347 canonical_parent_order=True,
1346 data_config=None,
1348 data_config=None,
1347 delta_config=None,
1349 delta_config=None,
1348 feature_config=None,
1350 feature_config=None,
1349 may_inline=True, # may inline new revlog
1351 may_inline=True, # may inline new revlog
1350 ):
1352 ):
1351 """
1353 """
1352 create a revlog object
1354 create a revlog object
1353
1355
1354 opener is a function that abstracts the file opening operation
1356 opener is a function that abstracts the file opening operation
1355 and can be used to implement COW semantics or the like.
1357 and can be used to implement COW semantics or the like.
1356
1358
1357 `target`: a (KIND, ID) tuple that identify the content stored in
1359 `target`: a (KIND, ID) tuple that identify the content stored in
1358 this revlog. It help the rest of the code to understand what the revlog
1360 this revlog. It help the rest of the code to understand what the revlog
1359 is about without having to resort to heuristic and index filename
1361 is about without having to resort to heuristic and index filename
1360 analysis. Note: that this must be reliably be set by normal code, but
1362 analysis. Note: that this must be reliably be set by normal code, but
1361 that test, debug, or performance measurement code might not set this to
1363 that test, debug, or performance measurement code might not set this to
1362 accurate value.
1364 accurate value.
1363 """
1365 """
1364
1366
1365 self.radix = radix
1367 self.radix = radix
1366
1368
1367 self._docket_file = None
1369 self._docket_file = None
1368 self._indexfile = None
1370 self._indexfile = None
1369 self._datafile = None
1371 self._datafile = None
1370 self._sidedatafile = None
1372 self._sidedatafile = None
1371 self._nodemap_file = None
1373 self._nodemap_file = None
1372 self.postfix = postfix
1374 self.postfix = postfix
1373 self._trypending = trypending
1375 self._trypending = trypending
1374 self._try_split = try_split
1376 self._try_split = try_split
1375 self._may_inline = may_inline
1377 self._may_inline = may_inline
1376 self.opener = opener
1378 self.opener = opener
1377 if persistentnodemap:
1379 if persistentnodemap:
1378 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1380 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1379
1381
1380 assert target[0] in ALL_KINDS
1382 assert target[0] in ALL_KINDS
1381 assert len(target) == 2
1383 assert len(target) == 2
1382 self.target = target
1384 self.target = target
1383 if feature_config is not None:
1385 if feature_config is not None:
1384 self.feature_config = feature_config.copy()
1386 self.feature_config = feature_config.copy()
1385 elif b'feature-config' in self.opener.options:
1387 elif b'feature-config' in self.opener.options:
1386 self.feature_config = self.opener.options[b'feature-config'].copy()
1388 self.feature_config = self.opener.options[b'feature-config'].copy()
1387 else:
1389 else:
1388 self.feature_config = FeatureConfig()
1390 self.feature_config = FeatureConfig()
1389 self.feature_config.censorable = censorable
1391 self.feature_config.censorable = censorable
1390 self.feature_config.canonical_parent_order = canonical_parent_order
1392 self.feature_config.canonical_parent_order = canonical_parent_order
1391 if data_config is not None:
1393 if data_config is not None:
1392 self.data_config = data_config.copy()
1394 self.data_config = data_config.copy()
1393 elif b'data-config' in self.opener.options:
1395 elif b'data-config' in self.opener.options:
1394 self.data_config = self.opener.options[b'data-config'].copy()
1396 self.data_config = self.opener.options[b'data-config'].copy()
1395 else:
1397 else:
1396 self.data_config = DataConfig()
1398 self.data_config = DataConfig()
1397 self.data_config.check_ambig = checkambig
1399 self.data_config.check_ambig = checkambig
1398 self.data_config.mmap_large_index = mmaplargeindex
1400 self.data_config.mmap_large_index = mmaplargeindex
1399 if delta_config is not None:
1401 if delta_config is not None:
1400 self.delta_config = delta_config.copy()
1402 self.delta_config = delta_config.copy()
1401 elif b'delta-config' in self.opener.options:
1403 elif b'delta-config' in self.opener.options:
1402 self.delta_config = self.opener.options[b'delta-config'].copy()
1404 self.delta_config = self.opener.options[b'delta-config'].copy()
1403 else:
1405 else:
1404 self.delta_config = DeltaConfig()
1406 self.delta_config = DeltaConfig()
1405 self.delta_config.upper_bound_comp = upperboundcomp
1407 self.delta_config.upper_bound_comp = upperboundcomp
1406
1408
1407 # Maps rev to chain base rev.
1409 # Maps rev to chain base rev.
1408 self._chainbasecache = util.lrucachedict(100)
1410 self._chainbasecache = util.lrucachedict(100)
1409
1411
1410 self.index = None
1412 self.index = None
1411 self._docket = None
1413 self._docket = None
1412 self._nodemap_docket = None
1414 self._nodemap_docket = None
1413 # Mapping of partial identifiers to full nodes.
1415 # Mapping of partial identifiers to full nodes.
1414 self._pcache = {}
1416 self._pcache = {}
1415
1417
1416 # other optionnals features
1418 # other optionnals features
1417
1419
1418 # Make copy of flag processors so each revlog instance can support
1420 # Make copy of flag processors so each revlog instance can support
1419 # custom flags.
1421 # custom flags.
1420 self._flagprocessors = dict(flagutil.flagprocessors)
1422 self._flagprocessors = dict(flagutil.flagprocessors)
1421 # prevent nesting of addgroup
1423 # prevent nesting of addgroup
1422 self._adding_group = None
1424 self._adding_group = None
1423
1425
1424 chunk_cache = self._loadindex()
1426 chunk_cache = self._loadindex()
1425 self._load_inner(chunk_cache)
1427 self._load_inner(chunk_cache)
1426 self._concurrencychecker = concurrencychecker
1428 self._concurrencychecker = concurrencychecker
1427
1429
1428 def _init_opts(self):
1430 def _init_opts(self):
1429 """process options (from above/config) to setup associated default revlog mode
1431 """process options (from above/config) to setup associated default revlog mode
1430
1432
1431 These values might be affected when actually reading on disk information.
1433 These values might be affected when actually reading on disk information.
1432
1434
1433 The relevant values are returned for use in _loadindex().
1435 The relevant values are returned for use in _loadindex().
1434
1436
1435 * newversionflags:
1437 * newversionflags:
1436 version header to use if we need to create a new revlog
1438 version header to use if we need to create a new revlog
1437
1439
1438 * mmapindexthreshold:
1440 * mmapindexthreshold:
1439 minimal index size for start to use mmap
1441 minimal index size for start to use mmap
1440
1442
1441 * force_nodemap:
1443 * force_nodemap:
1442 force the usage of a "development" version of the nodemap code
1444 force the usage of a "development" version of the nodemap code
1443 """
1445 """
1444 opts = self.opener.options
1446 opts = self.opener.options
1445
1447
1446 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1448 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1447 new_header = CHANGELOGV2
1449 new_header = CHANGELOGV2
1448 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1450 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1449 self.feature_config.compute_rank = compute_rank
1451 self.feature_config.compute_rank = compute_rank
1450 elif b'revlogv2' in opts:
1452 elif b'revlogv2' in opts:
1451 new_header = REVLOGV2
1453 new_header = REVLOGV2
1452 elif b'revlogv1' in opts:
1454 elif b'revlogv1' in opts:
1453 new_header = REVLOGV1
1455 new_header = REVLOGV1
1454 if self._may_inline:
1456 if self._may_inline:
1455 new_header |= FLAG_INLINE_DATA
1457 new_header |= FLAG_INLINE_DATA
1456 if b'generaldelta' in opts:
1458 if b'generaldelta' in opts:
1457 new_header |= FLAG_GENERALDELTA
1459 new_header |= FLAG_GENERALDELTA
1458 elif b'revlogv0' in self.opener.options:
1460 elif b'revlogv0' in self.opener.options:
1459 new_header = REVLOGV0
1461 new_header = REVLOGV0
1460 else:
1462 else:
1461 new_header = REVLOG_DEFAULT_VERSION
1463 new_header = REVLOG_DEFAULT_VERSION
1462
1464
1463 mmapindexthreshold = None
1465 mmapindexthreshold = None
1464 if self.data_config.mmap_large_index:
1466 if self.data_config.mmap_large_index:
1465 mmapindexthreshold = self.data_config.mmap_index_threshold
1467 mmapindexthreshold = self.data_config.mmap_index_threshold
1466 if self.feature_config.enable_ellipsis:
1468 if self.feature_config.enable_ellipsis:
1467 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1469 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1468
1470
1469 # revlog v0 doesn't have flag processors
1471 # revlog v0 doesn't have flag processors
1470 for flag, processor in opts.get(b'flagprocessors', {}).items():
1472 for flag, processor in opts.get(b'flagprocessors', {}).items():
1471 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1473 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1472
1474
1473 chunk_cache_size = self.data_config.chunk_cache_size
1475 chunk_cache_size = self.data_config.chunk_cache_size
1474 if chunk_cache_size <= 0:
1476 if chunk_cache_size <= 0:
1475 raise error.RevlogError(
1477 raise error.RevlogError(
1476 _(b'revlog chunk cache size %r is not greater than 0')
1478 _(b'revlog chunk cache size %r is not greater than 0')
1477 % chunk_cache_size
1479 % chunk_cache_size
1478 )
1480 )
1479 elif chunk_cache_size & (chunk_cache_size - 1):
1481 elif chunk_cache_size & (chunk_cache_size - 1):
1480 raise error.RevlogError(
1482 raise error.RevlogError(
1481 _(b'revlog chunk cache size %r is not a power of 2')
1483 _(b'revlog chunk cache size %r is not a power of 2')
1482 % chunk_cache_size
1484 % chunk_cache_size
1483 )
1485 )
1484 force_nodemap = opts.get(b'devel-force-nodemap', False)
1486 force_nodemap = opts.get(b'devel-force-nodemap', False)
1485 return new_header, mmapindexthreshold, force_nodemap
1487 return new_header, mmapindexthreshold, force_nodemap
1486
1488
1487 def _get_data(self, filepath, mmap_threshold, size=None):
1489 def _get_data(self, filepath, mmap_threshold, size=None):
1488 """return a file content with or without mmap
1490 """return a file content with or without mmap
1489
1491
1490 If the file is missing return the empty string"""
1492 If the file is missing return the empty string"""
1491 try:
1493 try:
1492 with self.opener(filepath) as fp:
1494 with self.opener(filepath) as fp:
1493 if mmap_threshold is not None:
1495 if mmap_threshold is not None:
1494 file_size = self.opener.fstat(fp).st_size
1496 file_size = self.opener.fstat(fp).st_size
1495 if (
1497 if (
1496 file_size >= mmap_threshold
1498 file_size >= mmap_threshold
1497 and self.opener.is_mmap_safe(filepath)
1499 and self.opener.is_mmap_safe(filepath)
1498 ):
1500 ):
1499 if size is not None:
1501 if size is not None:
1500 # avoid potentiel mmap crash
1502 # avoid potentiel mmap crash
1501 size = min(file_size, size)
1503 size = min(file_size, size)
1502 # TODO: should .close() to release resources without
1504 # TODO: should .close() to release resources without
1503 # relying on Python GC
1505 # relying on Python GC
1504 if size is None:
1506 if size is None:
1505 return util.buffer(util.mmapread(fp))
1507 return util.buffer(util.mmapread(fp))
1506 else:
1508 else:
1507 return util.buffer(util.mmapread(fp, size))
1509 return util.buffer(util.mmapread(fp, size))
1508 if size is None:
1510 if size is None:
1509 return fp.read()
1511 return fp.read()
1510 else:
1512 else:
1511 return fp.read(size)
1513 return fp.read(size)
1512 except FileNotFoundError:
1514 except FileNotFoundError:
1513 return b''
1515 return b''
1514
1516
1515 def get_streams(self, max_linkrev, force_inline=False):
1517 def get_streams(self, max_linkrev, force_inline=False):
1516 """return a list of streams that represent this revlog
1518 """return a list of streams that represent this revlog
1517
1519
1518 This is used by stream-clone to do bytes to bytes copies of a repository.
1520 This is used by stream-clone to do bytes to bytes copies of a repository.
1519
1521
1520 This streams data for all revisions that refer to a changelog revision up
1522 This streams data for all revisions that refer to a changelog revision up
1521 to `max_linkrev`.
1523 to `max_linkrev`.
1522
1524
1523 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1525 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1524
1526
1525 It returns is a list of three-tuple:
1527 It returns is a list of three-tuple:
1526
1528
1527 [
1529 [
1528 (filename, bytes_stream, stream_size),
1530 (filename, bytes_stream, stream_size),
1529 …
1531 …
1530 ]
1532 ]
1531 """
1533 """
1532 n = len(self)
1534 n = len(self)
1533 index = self.index
1535 index = self.index
1534 while n > 0:
1536 while n > 0:
1535 linkrev = index[n - 1][4]
1537 linkrev = index[n - 1][4]
1536 if linkrev < max_linkrev:
1538 if linkrev < max_linkrev:
1537 break
1539 break
1538 # note: this loop will rarely go through multiple iterations, since
1540 # note: this loop will rarely go through multiple iterations, since
1539 # it only traverses commits created during the current streaming
1541 # it only traverses commits created during the current streaming
1540 # pull operation.
1542 # pull operation.
1541 #
1543 #
1542 # If this become a problem, using a binary search should cap the
1544 # If this become a problem, using a binary search should cap the
1543 # runtime of this.
1545 # runtime of this.
1544 n = n - 1
1546 n = n - 1
1545 if n == 0:
1547 if n == 0:
1546 # no data to send
1548 # no data to send
1547 return []
1549 return []
1548 index_size = n * index.entry_size
1550 index_size = n * index.entry_size
1549 data_size = self.end(n - 1)
1551 data_size = self.end(n - 1)
1550
1552
1551 # XXX we might have been split (or stripped) since the object
1553 # XXX we might have been split (or stripped) since the object
1552 # initialization, We need to close this race too, but having a way to
1554 # initialization, We need to close this race too, but having a way to
1553 # pre-open the file we feed to the revlog and never closing them before
1555 # pre-open the file we feed to the revlog and never closing them before
1554 # we are done streaming.
1556 # we are done streaming.
1555
1557
1556 if self._inline:
1558 if self._inline:
1557
1559
1558 def get_stream():
1560 def get_stream():
1559 with self.opener(self._indexfile, mode=b"r") as fp:
1561 with self.opener(self._indexfile, mode=b"r") as fp:
1560 yield None
1562 yield None
1561 size = index_size + data_size
1563 size = index_size + data_size
1562 if size <= 65536:
1564 if size <= 65536:
1563 yield fp.read(size)
1565 yield fp.read(size)
1564 else:
1566 else:
1565 yield from util.filechunkiter(fp, limit=size)
1567 yield from util.filechunkiter(fp, limit=size)
1566
1568
1567 inline_stream = get_stream()
1569 inline_stream = get_stream()
1568 next(inline_stream)
1570 next(inline_stream)
1569 return [
1571 return [
1570 (self._indexfile, inline_stream, index_size + data_size),
1572 (self._indexfile, inline_stream, index_size + data_size),
1571 ]
1573 ]
1572 elif force_inline:
1574 elif force_inline:
1573
1575
1574 def get_stream():
1576 def get_stream():
1575 with self.reading():
1577 with self.reading():
1576 yield None
1578 yield None
1577
1579
1578 for rev in range(n):
1580 for rev in range(n):
1579 idx = self.index.entry_binary(rev)
1581 idx = self.index.entry_binary(rev)
1580 if rev == 0 and self._docket is None:
1582 if rev == 0 and self._docket is None:
1581 # re-inject the inline flag
1583 # re-inject the inline flag
1582 header = self._format_flags
1584 header = self._format_flags
1583 header |= self._format_version
1585 header |= self._format_version
1584 header |= FLAG_INLINE_DATA
1586 header |= FLAG_INLINE_DATA
1585 header = self.index.pack_header(header)
1587 header = self.index.pack_header(header)
1586 idx = header + idx
1588 idx = header + idx
1587 yield idx
1589 yield idx
1588 yield self._inner.get_segment_for_revs(rev, rev)[1]
1590 yield self._inner.get_segment_for_revs(rev, rev)[1]
1589
1591
1590 inline_stream = get_stream()
1592 inline_stream = get_stream()
1591 next(inline_stream)
1593 next(inline_stream)
1592 return [
1594 return [
1593 (self._indexfile, inline_stream, index_size + data_size),
1595 (self._indexfile, inline_stream, index_size + data_size),
1594 ]
1596 ]
1595 else:
1597 else:
1596
1598
1597 def get_index_stream():
1599 def get_index_stream():
1598 with self.opener(self._indexfile, mode=b"r") as fp:
1600 with self.opener(self._indexfile, mode=b"r") as fp:
1599 yield None
1601 yield None
1600 if index_size <= 65536:
1602 if index_size <= 65536:
1601 yield fp.read(index_size)
1603 yield fp.read(index_size)
1602 else:
1604 else:
1603 yield from util.filechunkiter(fp, limit=index_size)
1605 yield from util.filechunkiter(fp, limit=index_size)
1604
1606
1605 def get_data_stream():
1607 def get_data_stream():
1606 with self._datafp() as fp:
1608 with self._datafp() as fp:
1607 yield None
1609 yield None
1608 if data_size <= 65536:
1610 if data_size <= 65536:
1609 yield fp.read(data_size)
1611 yield fp.read(data_size)
1610 else:
1612 else:
1611 yield from util.filechunkiter(fp, limit=data_size)
1613 yield from util.filechunkiter(fp, limit=data_size)
1612
1614
1613 index_stream = get_index_stream()
1615 index_stream = get_index_stream()
1614 next(index_stream)
1616 next(index_stream)
1615 data_stream = get_data_stream()
1617 data_stream = get_data_stream()
1616 next(data_stream)
1618 next(data_stream)
1617 return [
1619 return [
1618 (self._datafile, data_stream, data_size),
1620 (self._datafile, data_stream, data_size),
1619 (self._indexfile, index_stream, index_size),
1621 (self._indexfile, index_stream, index_size),
1620 ]
1622 ]
1621
1623
1622 def _loadindex(self, docket=None):
1624 def _loadindex(self, docket=None):
1623 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1625 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1624
1626
1625 if self.postfix is not None:
1627 if self.postfix is not None:
1626 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1628 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1627 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1629 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1628 entry_point = b'%s.i.a' % self.radix
1630 entry_point = b'%s.i.a' % self.radix
1629 elif self._try_split and self.opener.exists(self._split_index_file):
1631 elif self._try_split and self.opener.exists(self._split_index_file):
1630 entry_point = self._split_index_file
1632 entry_point = self._split_index_file
1631 else:
1633 else:
1632 entry_point = b'%s.i' % self.radix
1634 entry_point = b'%s.i' % self.radix
1633
1635
1634 if docket is not None:
1636 if docket is not None:
1635 self._docket = docket
1637 self._docket = docket
1636 self._docket_file = entry_point
1638 self._docket_file = entry_point
1637 else:
1639 else:
1638 self._initempty = True
1640 self._initempty = True
1639 entry_data = self._get_data(entry_point, mmapindexthreshold)
1641 entry_data = self._get_data(entry_point, mmapindexthreshold)
1640 if len(entry_data) > 0:
1642 if len(entry_data) > 0:
1641 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1643 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1642 self._initempty = False
1644 self._initempty = False
1643 else:
1645 else:
1644 header = new_header
1646 header = new_header
1645
1647
1646 self._format_flags = header & ~0xFFFF
1648 self._format_flags = header & ~0xFFFF
1647 self._format_version = header & 0xFFFF
1649 self._format_version = header & 0xFFFF
1648
1650
1649 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1651 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1650 if supported_flags is None:
1652 if supported_flags is None:
1651 msg = _(b'unknown version (%d) in revlog %s')
1653 msg = _(b'unknown version (%d) in revlog %s')
1652 msg %= (self._format_version, self.display_id)
1654 msg %= (self._format_version, self.display_id)
1653 raise error.RevlogError(msg)
1655 raise error.RevlogError(msg)
1654 elif self._format_flags & ~supported_flags:
1656 elif self._format_flags & ~supported_flags:
1655 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1657 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1656 display_flag = self._format_flags >> 16
1658 display_flag = self._format_flags >> 16
1657 msg %= (display_flag, self._format_version, self.display_id)
1659 msg %= (display_flag, self._format_version, self.display_id)
1658 raise error.RevlogError(msg)
1660 raise error.RevlogError(msg)
1659
1661
1660 features = FEATURES_BY_VERSION[self._format_version]
1662 features = FEATURES_BY_VERSION[self._format_version]
1661 self._inline = features[b'inline'](self._format_flags)
1663 self._inline = features[b'inline'](self._format_flags)
1662 self.delta_config.general_delta = features[b'generaldelta'](
1664 self.delta_config.general_delta = features[b'generaldelta'](
1663 self._format_flags
1665 self._format_flags
1664 )
1666 )
1665 self.feature_config.has_side_data = features[b'sidedata']
1667 self.feature_config.has_side_data = features[b'sidedata']
1666
1668
1667 if not features[b'docket']:
1669 if not features[b'docket']:
1668 self._indexfile = entry_point
1670 self._indexfile = entry_point
1669 index_data = entry_data
1671 index_data = entry_data
1670 else:
1672 else:
1671 self._docket_file = entry_point
1673 self._docket_file = entry_point
1672 if self._initempty:
1674 if self._initempty:
1673 self._docket = docketutil.default_docket(self, header)
1675 self._docket = docketutil.default_docket(self, header)
1674 else:
1676 else:
1675 self._docket = docketutil.parse_docket(
1677 self._docket = docketutil.parse_docket(
1676 self, entry_data, use_pending=self._trypending
1678 self, entry_data, use_pending=self._trypending
1677 )
1679 )
1678
1680
1679 if self._docket is not None:
1681 if self._docket is not None:
1680 self._indexfile = self._docket.index_filepath()
1682 self._indexfile = self._docket.index_filepath()
1681 index_data = b''
1683 index_data = b''
1682 index_size = self._docket.index_end
1684 index_size = self._docket.index_end
1683 if index_size > 0:
1685 if index_size > 0:
1684 index_data = self._get_data(
1686 index_data = self._get_data(
1685 self._indexfile, mmapindexthreshold, size=index_size
1687 self._indexfile, mmapindexthreshold, size=index_size
1686 )
1688 )
1687 if len(index_data) < index_size:
1689 if len(index_data) < index_size:
1688 msg = _(b'too few index data for %s: got %d, expected %d')
1690 msg = _(b'too few index data for %s: got %d, expected %d')
1689 msg %= (self.display_id, len(index_data), index_size)
1691 msg %= (self.display_id, len(index_data), index_size)
1690 raise error.RevlogError(msg)
1692 raise error.RevlogError(msg)
1691
1693
1692 self._inline = False
1694 self._inline = False
1693 # generaldelta implied by version 2 revlogs.
1695 # generaldelta implied by version 2 revlogs.
1694 self.delta_config.general_delta = True
1696 self.delta_config.general_delta = True
1695 # the logic for persistent nodemap will be dealt with within the
1697 # the logic for persistent nodemap will be dealt with within the
1696 # main docket, so disable it for now.
1698 # main docket, so disable it for now.
1697 self._nodemap_file = None
1699 self._nodemap_file = None
1698
1700
1699 if self._docket is not None:
1701 if self._docket is not None:
1700 self._datafile = self._docket.data_filepath()
1702 self._datafile = self._docket.data_filepath()
1701 self._sidedatafile = self._docket.sidedata_filepath()
1703 self._sidedatafile = self._docket.sidedata_filepath()
1702 elif self.postfix is None:
1704 elif self.postfix is None:
1703 self._datafile = b'%s.d' % self.radix
1705 self._datafile = b'%s.d' % self.radix
1704 else:
1706 else:
1705 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1707 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1706
1708
1707 self.nodeconstants = sha1nodeconstants
1709 self.nodeconstants = sha1nodeconstants
1708 self.nullid = self.nodeconstants.nullid
1710 self.nullid = self.nodeconstants.nullid
1709
1711
1710 # sparse-revlog can't be on without general-delta (issue6056)
1712 # sparse-revlog can't be on without general-delta (issue6056)
1711 if not self.delta_config.general_delta:
1713 if not self.delta_config.general_delta:
1712 self.delta_config.sparse_revlog = False
1714 self.delta_config.sparse_revlog = False
1713
1715
1714 self._storedeltachains = True
1716 self._storedeltachains = True
1715
1717
1716 devel_nodemap = (
1718 devel_nodemap = (
1717 self._nodemap_file
1719 self._nodemap_file
1718 and force_nodemap
1720 and force_nodemap
1719 and parse_index_v1_nodemap is not None
1721 and parse_index_v1_nodemap is not None
1720 )
1722 )
1721
1723
1722 use_rust_index = False
1724 use_rust_index = False
1723 if rustrevlog is not None and self._nodemap_file is not None:
1725 if rustrevlog is not None and self._nodemap_file is not None:
1724 # we would like to use the rust_index in all case, especially
1726 # we would like to use the rust_index in all case, especially
1725 # because it is necessary for AncestorsIterator and LazyAncestors
1727 # because it is necessary for AncestorsIterator and LazyAncestors
1726 # since the 6.7 cycle.
1728 # since the 6.7 cycle.
1727 #
1729 #
1728 # However, the performance impact of inconditionnaly building the
1730 # However, the performance impact of inconditionnaly building the
1729 # nodemap is currently a problem for non-persistent nodemap
1731 # nodemap is currently a problem for non-persistent nodemap
1730 # repository.
1732 # repository.
1731 use_rust_index = True
1733 use_rust_index = True
1732
1734
1733 self._parse_index = parse_index_v1
1735 self._parse_index = parse_index_v1
1734 if self._format_version == REVLOGV0:
1736 if self._format_version == REVLOGV0:
1735 self._parse_index = revlogv0.parse_index_v0
1737 self._parse_index = revlogv0.parse_index_v0
1736 elif self._format_version == REVLOGV2:
1738 elif self._format_version == REVLOGV2:
1737 self._parse_index = parse_index_v2
1739 self._parse_index = parse_index_v2
1738 elif self._format_version == CHANGELOGV2:
1740 elif self._format_version == CHANGELOGV2:
1739 self._parse_index = parse_index_cl_v2
1741 self._parse_index = parse_index_cl_v2
1740 elif devel_nodemap:
1742 elif devel_nodemap:
1741 self._parse_index = parse_index_v1_nodemap
1743 self._parse_index = parse_index_v1_nodemap
1742 elif use_rust_index:
1744 elif use_rust_index:
1743 self._parse_index = functools.partial(
1745 self._parse_index = functools.partial(
1744 parse_index_v1_rust, default_header=new_header
1746 parse_index_v1_rust, default_header=new_header
1745 )
1747 )
1746 try:
1748 try:
1747 d = self._parse_index(index_data, self._inline)
1749 d = self._parse_index(index_data, self._inline)
1748 index, chunkcache = d
1750 index, chunkcache = d
1749 use_nodemap = (
1751 use_nodemap = (
1750 not self._inline
1752 not self._inline
1751 and self._nodemap_file is not None
1753 and self._nodemap_file is not None
1752 and hasattr(index, 'update_nodemap_data')
1754 and hasattr(index, 'update_nodemap_data')
1753 )
1755 )
1754 if use_nodemap:
1756 if use_nodemap:
1755 nodemap_data = nodemaputil.persisted_data(self)
1757 nodemap_data = nodemaputil.persisted_data(self)
1756 if nodemap_data is not None:
1758 if nodemap_data is not None:
1757 docket = nodemap_data[0]
1759 docket = nodemap_data[0]
1758 if (
1760 if (
1759 len(d[0]) > docket.tip_rev
1761 len(d[0]) > docket.tip_rev
1760 and d[0][docket.tip_rev][7] == docket.tip_node
1762 and d[0][docket.tip_rev][7] == docket.tip_node
1761 ):
1763 ):
1762 # no changelog tampering
1764 # no changelog tampering
1763 self._nodemap_docket = docket
1765 self._nodemap_docket = docket
1764 index.update_nodemap_data(*nodemap_data)
1766 index.update_nodemap_data(*nodemap_data)
1765 except (ValueError, IndexError):
1767 except (ValueError, IndexError):
1766 raise error.RevlogError(
1768 raise error.RevlogError(
1767 _(b"index %s is corrupted") % self.display_id
1769 _(b"index %s is corrupted") % self.display_id
1768 )
1770 )
1769 self.index = index
1771 self.index = index
1770 # revnum -> (chain-length, sum-delta-length)
1772 # revnum -> (chain-length, sum-delta-length)
1771 self._chaininfocache = util.lrucachedict(500)
1773 self._chaininfocache = util.lrucachedict(500)
1772
1774
1773 return chunkcache
1775 return chunkcache
1774
1776
1775 def _load_inner(self, chunk_cache):
1777 def _load_inner(self, chunk_cache):
1776 if self._docket is None:
1778 if self._docket is None:
1777 default_compression_header = None
1779 default_compression_header = None
1778 else:
1780 else:
1779 default_compression_header = self._docket.default_compression_header
1781 default_compression_header = self._docket.default_compression_header
1780
1782
1781 self._inner = _InnerRevlog(
1783 self._inner = _InnerRevlog(
1782 opener=self.opener,
1784 opener=self.opener,
1783 index=self.index,
1785 index=self.index,
1784 index_file=self._indexfile,
1786 index_file=self._indexfile,
1785 data_file=self._datafile,
1787 data_file=self._datafile,
1786 sidedata_file=self._sidedatafile,
1788 sidedata_file=self._sidedatafile,
1787 inline=self._inline,
1789 inline=self._inline,
1788 data_config=self.data_config,
1790 data_config=self.data_config,
1789 delta_config=self.delta_config,
1791 delta_config=self.delta_config,
1790 feature_config=self.feature_config,
1792 feature_config=self.feature_config,
1791 chunk_cache=chunk_cache,
1793 chunk_cache=chunk_cache,
1792 default_compression_header=default_compression_header,
1794 default_compression_header=default_compression_header,
1793 )
1795 )
1794
1796
1795 def get_revlog(self):
1797 def get_revlog(self):
1796 """simple function to mirror API of other not-really-revlog API"""
1798 """simple function to mirror API of other not-really-revlog API"""
1797 return self
1799 return self
1798
1800
1799 @util.propertycache
1801 @util.propertycache
1800 def revlog_kind(self):
1802 def revlog_kind(self):
1801 return self.target[0]
1803 return self.target[0]
1802
1804
1803 @util.propertycache
1805 @util.propertycache
1804 def display_id(self):
1806 def display_id(self):
1805 """The public facing "ID" of the revlog that we use in message"""
1807 """The public facing "ID" of the revlog that we use in message"""
1806 if self.revlog_kind == KIND_FILELOG:
1808 if self.revlog_kind == KIND_FILELOG:
1807 # Reference the file without the "data/" prefix, so it is familiar
1809 # Reference the file without the "data/" prefix, so it is familiar
1808 # to the user.
1810 # to the user.
1809 return self.target[1]
1811 return self.target[1]
1810 else:
1812 else:
1811 return self.radix
1813 return self.radix
1812
1814
1813 def _datafp(self, mode=b'r'):
1815 def _datafp(self, mode=b'r'):
1814 """file object for the revlog's data file"""
1816 """file object for the revlog's data file"""
1815 return self.opener(self._datafile, mode=mode)
1817 return self.opener(self._datafile, mode=mode)
1816
1818
1817 def tiprev(self):
1819 def tiprev(self):
1818 return len(self.index) - 1
1820 return len(self.index) - 1
1819
1821
1820 def tip(self):
1822 def tip(self):
1821 return self.node(self.tiprev())
1823 return self.node(self.tiprev())
1822
1824
1823 def __contains__(self, rev):
1825 def __contains__(self, rev):
1824 return 0 <= rev < len(self)
1826 return 0 <= rev < len(self)
1825
1827
1826 def __len__(self):
1828 def __len__(self):
1827 return len(self.index)
1829 return len(self.index)
1828
1830
1829 def __iter__(self):
1831 def __iter__(self) -> Iterator[int]:
1830 return iter(range(len(self)))
1832 return iter(range(len(self)))
1831
1833
1832 def revs(self, start=0, stop=None):
1834 def revs(self, start=0, stop=None):
1833 """iterate over all rev in this revlog (from start to stop)"""
1835 """iterate over all rev in this revlog (from start to stop)"""
1834 return storageutil.iterrevs(len(self), start=start, stop=stop)
1836 return storageutil.iterrevs(len(self), start=start, stop=stop)
1835
1837
1836 def hasnode(self, node):
1838 def hasnode(self, node):
1837 try:
1839 try:
1838 self.rev(node)
1840 self.rev(node)
1839 return True
1841 return True
1840 except KeyError:
1842 except KeyError:
1841 return False
1843 return False
1842
1844
1843 def _candelta(self, baserev, rev):
1845 def _candelta(self, baserev, rev):
1844 """whether two revisions (baserev, rev) can be delta-ed or not"""
1846 """whether two revisions (baserev, rev) can be delta-ed or not"""
1845 # Disable delta if either rev requires a content-changing flag
1847 # Disable delta if either rev requires a content-changing flag
1846 # processor (ex. LFS). This is because such flag processor can alter
1848 # processor (ex. LFS). This is because such flag processor can alter
1847 # the rawtext content that the delta will be based on, and two clients
1849 # the rawtext content that the delta will be based on, and two clients
1848 # could have a same revlog node with different flags (i.e. different
1850 # could have a same revlog node with different flags (i.e. different
1849 # rawtext contents) and the delta could be incompatible.
1851 # rawtext contents) and the delta could be incompatible.
1850 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1852 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1851 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1853 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1852 ):
1854 ):
1853 return False
1855 return False
1854 return True
1856 return True
1855
1857
1856 def update_caches(self, transaction):
1858 def update_caches(self, transaction):
1857 """update on disk cache
1859 """update on disk cache
1858
1860
1859 If a transaction is passed, the update may be delayed to transaction
1861 If a transaction is passed, the update may be delayed to transaction
1860 commit."""
1862 commit."""
1861 if self._nodemap_file is not None:
1863 if self._nodemap_file is not None:
1862 if transaction is None:
1864 if transaction is None:
1863 nodemaputil.update_persistent_nodemap(self)
1865 nodemaputil.update_persistent_nodemap(self)
1864 else:
1866 else:
1865 nodemaputil.setup_persistent_nodemap(transaction, self)
1867 nodemaputil.setup_persistent_nodemap(transaction, self)
1866
1868
1867 def clearcaches(self):
1869 def clearcaches(self):
1868 """Clear in-memory caches"""
1870 """Clear in-memory caches"""
1869 self._chainbasecache.clear()
1871 self._chainbasecache.clear()
1870 self._inner.clear_cache()
1872 self._inner.clear_cache()
1871 self._pcache = {}
1873 self._pcache = {}
1872 self._nodemap_docket = None
1874 self._nodemap_docket = None
1873 self.index.clearcaches()
1875 self.index.clearcaches()
1874 # The python code is the one responsible for validating the docket, we
1876 # The python code is the one responsible for validating the docket, we
1875 # end up having to refresh it here.
1877 # end up having to refresh it here.
1876 use_nodemap = (
1878 use_nodemap = (
1877 not self._inline
1879 not self._inline
1878 and self._nodemap_file is not None
1880 and self._nodemap_file is not None
1879 and hasattr(self.index, 'update_nodemap_data')
1881 and hasattr(self.index, 'update_nodemap_data')
1880 )
1882 )
1881 if use_nodemap:
1883 if use_nodemap:
1882 nodemap_data = nodemaputil.persisted_data(self)
1884 nodemap_data = nodemaputil.persisted_data(self)
1883 if nodemap_data is not None:
1885 if nodemap_data is not None:
1884 self._nodemap_docket = nodemap_data[0]
1886 self._nodemap_docket = nodemap_data[0]
1885 self.index.update_nodemap_data(*nodemap_data)
1887 self.index.update_nodemap_data(*nodemap_data)
1886
1888
1887 def rev(self, node):
1889 def rev(self, node):
1888 """return the revision number associated with a <nodeid>"""
1890 """return the revision number associated with a <nodeid>"""
1889 try:
1891 try:
1890 return self.index.rev(node)
1892 return self.index.rev(node)
1891 except TypeError:
1893 except TypeError:
1892 raise
1894 raise
1893 except error.RevlogError:
1895 except error.RevlogError:
1894 # parsers.c radix tree lookup failed
1896 # parsers.c radix tree lookup failed
1895 if (
1897 if (
1896 node == self.nodeconstants.wdirid
1898 node == self.nodeconstants.wdirid
1897 or node in self.nodeconstants.wdirfilenodeids
1899 or node in self.nodeconstants.wdirfilenodeids
1898 ):
1900 ):
1899 raise error.WdirUnsupported
1901 raise error.WdirUnsupported
1900 raise error.LookupError(node, self.display_id, _(b'no node'))
1902 raise error.LookupError(node, self.display_id, _(b'no node'))
1901
1903
1902 # Accessors for index entries.
1904 # Accessors for index entries.
1903
1905
1904 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1906 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1905 # are flags.
1907 # are flags.
1906 def start(self, rev):
1908 def start(self, rev):
1907 return int(self.index[rev][0] >> 16)
1909 return int(self.index[rev][0] >> 16)
1908
1910
1909 def sidedata_cut_off(self, rev):
1911 def sidedata_cut_off(self, rev):
1910 sd_cut_off = self.index[rev][8]
1912 sd_cut_off = self.index[rev][8]
1911 if sd_cut_off != 0:
1913 if sd_cut_off != 0:
1912 return sd_cut_off
1914 return sd_cut_off
1913 # This is some annoying dance, because entries without sidedata
1915 # This is some annoying dance, because entries without sidedata
1914 # currently use 0 as their ofsset. (instead of previous-offset +
1916 # currently use 0 as their ofsset. (instead of previous-offset +
1915 # previous-size)
1917 # previous-size)
1916 #
1918 #
1917 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1919 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1918 # In the meantime, we need this.
1920 # In the meantime, we need this.
1919 while 0 <= rev:
1921 while 0 <= rev:
1920 e = self.index[rev]
1922 e = self.index[rev]
1921 if e[9] != 0:
1923 if e[9] != 0:
1922 return e[8] + e[9]
1924 return e[8] + e[9]
1923 rev -= 1
1925 rev -= 1
1924 return 0
1926 return 0
1925
1927
1926 def flags(self, rev):
1928 def flags(self, rev):
1927 return self.index[rev][0] & 0xFFFF
1929 return self.index[rev][0] & 0xFFFF
1928
1930
1929 def length(self, rev):
1931 def length(self, rev):
1930 return self.index[rev][1]
1932 return self.index[rev][1]
1931
1933
1932 def sidedata_length(self, rev):
1934 def sidedata_length(self, rev):
1933 if not self.feature_config.has_side_data:
1935 if not self.feature_config.has_side_data:
1934 return 0
1936 return 0
1935 return self.index[rev][9]
1937 return self.index[rev][9]
1936
1938
1937 def rawsize(self, rev):
1939 def rawsize(self, rev):
1938 """return the length of the uncompressed text for a given revision"""
1940 """return the length of the uncompressed text for a given revision"""
1939 l = self.index[rev][2]
1941 l = self.index[rev][2]
1940 if l >= 0:
1942 if l >= 0:
1941 return l
1943 return l
1942
1944
1943 t = self.rawdata(rev)
1945 t = self.rawdata(rev)
1944 return len(t)
1946 return len(t)
1945
1947
1946 def size(self, rev):
1948 def size(self, rev):
1947 """length of non-raw text (processed by a "read" flag processor)"""
1949 """length of non-raw text (processed by a "read" flag processor)"""
1948 # fast path: if no "read" flag processor could change the content,
1950 # fast path: if no "read" flag processor could change the content,
1949 # size is rawsize. note: ELLIPSIS is known to not change the content.
1951 # size is rawsize. note: ELLIPSIS is known to not change the content.
1950 flags = self.flags(rev)
1952 flags = self.flags(rev)
1951 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1953 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1952 return self.rawsize(rev)
1954 return self.rawsize(rev)
1953
1955
1954 return len(self.revision(rev))
1956 return len(self.revision(rev))
1955
1957
1956 def fast_rank(self, rev):
1958 def fast_rank(self, rev):
1957 """Return the rank of a revision if already known, or None otherwise.
1959 """Return the rank of a revision if already known, or None otherwise.
1958
1960
1959 The rank of a revision is the size of the sub-graph it defines as a
1961 The rank of a revision is the size of the sub-graph it defines as a
1960 head. Equivalently, the rank of a revision `r` is the size of the set
1962 head. Equivalently, the rank of a revision `r` is the size of the set
1961 `ancestors(r)`, `r` included.
1963 `ancestors(r)`, `r` included.
1962
1964
1963 This method returns the rank retrieved from the revlog in constant
1965 This method returns the rank retrieved from the revlog in constant
1964 time. It makes no attempt at computing unknown values for versions of
1966 time. It makes no attempt at computing unknown values for versions of
1965 the revlog which do not persist the rank.
1967 the revlog which do not persist the rank.
1966 """
1968 """
1967 rank = self.index[rev][ENTRY_RANK]
1969 rank = self.index[rev][ENTRY_RANK]
1968 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1970 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1969 return None
1971 return None
1970 if rev == nullrev:
1972 if rev == nullrev:
1971 return 0 # convention
1973 return 0 # convention
1972 return rank
1974 return rank
1973
1975
1974 def chainbase(self, rev):
1976 def chainbase(self, rev):
1975 base = self._chainbasecache.get(rev)
1977 base = self._chainbasecache.get(rev)
1976 if base is not None:
1978 if base is not None:
1977 return base
1979 return base
1978
1980
1979 index = self.index
1981 index = self.index
1980 iterrev = rev
1982 iterrev = rev
1981 base = index[iterrev][3]
1983 base = index[iterrev][3]
1982 while base != iterrev:
1984 while base != iterrev:
1983 iterrev = base
1985 iterrev = base
1984 base = index[iterrev][3]
1986 base = index[iterrev][3]
1985
1987
1986 self._chainbasecache[rev] = base
1988 self._chainbasecache[rev] = base
1987 return base
1989 return base
1988
1990
1989 def linkrev(self, rev):
1991 def linkrev(self, rev):
1990 return self.index[rev][4]
1992 return self.index[rev][4]
1991
1993
1992 def parentrevs(self, rev):
1994 def parentrevs(self, rev):
1993 try:
1995 try:
1994 entry = self.index[rev]
1996 entry = self.index[rev]
1995 except IndexError:
1997 except IndexError:
1996 if rev == wdirrev:
1998 if rev == wdirrev:
1997 raise error.WdirUnsupported
1999 raise error.WdirUnsupported
1998 raise
2000 raise
1999
2001
2000 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
2002 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
2001 return entry[6], entry[5]
2003 return entry[6], entry[5]
2002 else:
2004 else:
2003 return entry[5], entry[6]
2005 return entry[5], entry[6]
2004
2006
2005 # fast parentrevs(rev) where rev isn't filtered
2007 # fast parentrevs(rev) where rev isn't filtered
2006 _uncheckedparentrevs = parentrevs
2008 _uncheckedparentrevs = parentrevs
2007
2009
2008 def node(self, rev):
2010 def node(self, rev):
2009 try:
2011 try:
2010 return self.index[rev][7]
2012 return self.index[rev][7]
2011 except IndexError:
2013 except IndexError:
2012 if rev == wdirrev:
2014 if rev == wdirrev:
2013 raise error.WdirUnsupported
2015 raise error.WdirUnsupported
2014 raise
2016 raise
2015
2017
2016 # Derived from index values.
2018 # Derived from index values.
2017
2019
2018 def end(self, rev):
2020 def end(self, rev):
2019 return self.start(rev) + self.length(rev)
2021 return self.start(rev) + self.length(rev)
2020
2022
2021 def parents(self, node):
2023 def parents(self, node):
2022 i = self.index
2024 i = self.index
2023 d = i[self.rev(node)]
2025 d = i[self.rev(node)]
2024 # inline node() to avoid function call overhead
2026 # inline node() to avoid function call overhead
2025 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2027 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2026 return i[d[6]][7], i[d[5]][7]
2028 return i[d[6]][7], i[d[5]][7]
2027 else:
2029 else:
2028 return i[d[5]][7], i[d[6]][7]
2030 return i[d[5]][7], i[d[6]][7]
2029
2031
2030 def chainlen(self, rev):
2032 def chainlen(self, rev):
2031 return self._chaininfo(rev)[0]
2033 return self._chaininfo(rev)[0]
2032
2034
2033 def _chaininfo(self, rev):
2035 def _chaininfo(self, rev):
2034 chaininfocache = self._chaininfocache
2036 chaininfocache = self._chaininfocache
2035 if rev in chaininfocache:
2037 if rev in chaininfocache:
2036 return chaininfocache[rev]
2038 return chaininfocache[rev]
2037 index = self.index
2039 index = self.index
2038 generaldelta = self.delta_config.general_delta
2040 generaldelta = self.delta_config.general_delta
2039 iterrev = rev
2041 iterrev = rev
2040 e = index[iterrev]
2042 e = index[iterrev]
2041 clen = 0
2043 clen = 0
2042 compresseddeltalen = 0
2044 compresseddeltalen = 0
2043 while iterrev != e[3]:
2045 while iterrev != e[3]:
2044 clen += 1
2046 clen += 1
2045 compresseddeltalen += e[1]
2047 compresseddeltalen += e[1]
2046 if generaldelta:
2048 if generaldelta:
2047 iterrev = e[3]
2049 iterrev = e[3]
2048 else:
2050 else:
2049 iterrev -= 1
2051 iterrev -= 1
2050 if iterrev in chaininfocache:
2052 if iterrev in chaininfocache:
2051 t = chaininfocache[iterrev]
2053 t = chaininfocache[iterrev]
2052 clen += t[0]
2054 clen += t[0]
2053 compresseddeltalen += t[1]
2055 compresseddeltalen += t[1]
2054 break
2056 break
2055 e = index[iterrev]
2057 e = index[iterrev]
2056 else:
2058 else:
2057 # Add text length of base since decompressing that also takes
2059 # Add text length of base since decompressing that also takes
2058 # work. For cache hits the length is already included.
2060 # work. For cache hits the length is already included.
2059 compresseddeltalen += e[1]
2061 compresseddeltalen += e[1]
2060 r = (clen, compresseddeltalen)
2062 r = (clen, compresseddeltalen)
2061 chaininfocache[rev] = r
2063 chaininfocache[rev] = r
2062 return r
2064 return r
2063
2065
2064 def _deltachain(self, rev, stoprev=None):
2066 def _deltachain(self, rev, stoprev=None):
2065 return self._inner._deltachain(rev, stoprev=stoprev)
2067 return self._inner._deltachain(rev, stoprev=stoprev)
2066
2068
2067 def ancestors(self, revs, stoprev=0, inclusive=False):
2069 def ancestors(self, revs, stoprev=0, inclusive=False):
2068 """Generate the ancestors of 'revs' in reverse revision order.
2070 """Generate the ancestors of 'revs' in reverse revision order.
2069 Does not generate revs lower than stoprev.
2071 Does not generate revs lower than stoprev.
2070
2072
2071 See the documentation for ancestor.lazyancestors for more details."""
2073 See the documentation for ancestor.lazyancestors for more details."""
2072
2074
2073 # first, make sure start revisions aren't filtered
2075 # first, make sure start revisions aren't filtered
2074 revs = list(revs)
2076 revs = list(revs)
2075 checkrev = self.node
2077 checkrev = self.node
2076 for r in revs:
2078 for r in revs:
2077 checkrev(r)
2079 checkrev(r)
2078 # and we're sure ancestors aren't filtered as well
2080 # and we're sure ancestors aren't filtered as well
2079
2081
2080 if rustancestor is not None and self.index.rust_ext_compat:
2082 if rustancestor is not None and self.index.rust_ext_compat:
2081 lazyancestors = rustancestor.LazyAncestors
2083 lazyancestors = rustancestor.LazyAncestors
2082 arg = self.index
2084 arg = self.index
2083 else:
2085 else:
2084 lazyancestors = ancestor.lazyancestors
2086 lazyancestors = ancestor.lazyancestors
2085 arg = self._uncheckedparentrevs
2087 arg = self._uncheckedparentrevs
2086 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2088 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2087
2089
2088 def descendants(self, revs):
2090 def descendants(self, revs):
2089 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2091 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2090
2092
2091 def findcommonmissing(self, common=None, heads=None):
2093 def findcommonmissing(self, common=None, heads=None):
2092 """Return a tuple of the ancestors of common and the ancestors of heads
2094 """Return a tuple of the ancestors of common and the ancestors of heads
2093 that are not ancestors of common. In revset terminology, we return the
2095 that are not ancestors of common. In revset terminology, we return the
2094 tuple:
2096 tuple:
2095
2097
2096 ::common, (::heads) - (::common)
2098 ::common, (::heads) - (::common)
2097
2099
2098 The list is sorted by revision number, meaning it is
2100 The list is sorted by revision number, meaning it is
2099 topologically sorted.
2101 topologically sorted.
2100
2102
2101 'heads' and 'common' are both lists of node IDs. If heads is
2103 'heads' and 'common' are both lists of node IDs. If heads is
2102 not supplied, uses all of the revlog's heads. If common is not
2104 not supplied, uses all of the revlog's heads. If common is not
2103 supplied, uses nullid."""
2105 supplied, uses nullid."""
2104 if common is None:
2106 if common is None:
2105 common = [self.nullid]
2107 common = [self.nullid]
2106 if heads is None:
2108 if heads is None:
2107 heads = self.heads()
2109 heads = self.heads()
2108
2110
2109 common = [self.rev(n) for n in common]
2111 common = [self.rev(n) for n in common]
2110 heads = [self.rev(n) for n in heads]
2112 heads = [self.rev(n) for n in heads]
2111
2113
2112 # we want the ancestors, but inclusive
2114 # we want the ancestors, but inclusive
2113 class lazyset:
2115 class lazyset:
2114 def __init__(self, lazyvalues):
2116 def __init__(self, lazyvalues):
2115 self.addedvalues = set()
2117 self.addedvalues = set()
2116 self.lazyvalues = lazyvalues
2118 self.lazyvalues = lazyvalues
2117
2119
2118 def __contains__(self, value):
2120 def __contains__(self, value):
2119 return value in self.addedvalues or value in self.lazyvalues
2121 return value in self.addedvalues or value in self.lazyvalues
2120
2122
2121 def __iter__(self):
2123 def __iter__(self):
2122 added = self.addedvalues
2124 added = self.addedvalues
2123 for r in added:
2125 for r in added:
2124 yield r
2126 yield r
2125 for r in self.lazyvalues:
2127 for r in self.lazyvalues:
2126 if not r in added:
2128 if not r in added:
2127 yield r
2129 yield r
2128
2130
2129 def add(self, value):
2131 def add(self, value):
2130 self.addedvalues.add(value)
2132 self.addedvalues.add(value)
2131
2133
2132 def update(self, values):
2134 def update(self, values):
2133 self.addedvalues.update(values)
2135 self.addedvalues.update(values)
2134
2136
2135 has = lazyset(self.ancestors(common))
2137 has = lazyset(self.ancestors(common))
2136 has.add(nullrev)
2138 has.add(nullrev)
2137 has.update(common)
2139 has.update(common)
2138
2140
2139 # take all ancestors from heads that aren't in has
2141 # take all ancestors from heads that aren't in has
2140 missing = set()
2142 missing = set()
2141 visit = collections.deque(r for r in heads if r not in has)
2143 visit = collections.deque(r for r in heads if r not in has)
2142 while visit:
2144 while visit:
2143 r = visit.popleft()
2145 r = visit.popleft()
2144 if r in missing:
2146 if r in missing:
2145 continue
2147 continue
2146 else:
2148 else:
2147 missing.add(r)
2149 missing.add(r)
2148 for p in self.parentrevs(r):
2150 for p in self.parentrevs(r):
2149 if p not in has:
2151 if p not in has:
2150 visit.append(p)
2152 visit.append(p)
2151 missing = list(missing)
2153 missing = list(missing)
2152 missing.sort()
2154 missing.sort()
2153 return has, [self.node(miss) for miss in missing]
2155 return has, [self.node(miss) for miss in missing]
2154
2156
2155 def incrementalmissingrevs(self, common=None):
2157 def incrementalmissingrevs(self, common=None):
2156 """Return an object that can be used to incrementally compute the
2158 """Return an object that can be used to incrementally compute the
2157 revision numbers of the ancestors of arbitrary sets that are not
2159 revision numbers of the ancestors of arbitrary sets that are not
2158 ancestors of common. This is an ancestor.incrementalmissingancestors
2160 ancestors of common. This is an ancestor.incrementalmissingancestors
2159 object.
2161 object.
2160
2162
2161 'common' is a list of revision numbers. If common is not supplied, uses
2163 'common' is a list of revision numbers. If common is not supplied, uses
2162 nullrev.
2164 nullrev.
2163 """
2165 """
2164 if common is None:
2166 if common is None:
2165 common = [nullrev]
2167 common = [nullrev]
2166
2168
2167 if rustancestor is not None and self.index.rust_ext_compat:
2169 if rustancestor is not None and self.index.rust_ext_compat:
2168 return rustancestor.MissingAncestors(self.index, common)
2170 return rustancestor.MissingAncestors(self.index, common)
2169 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2171 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2170
2172
2171 def findmissingrevs(self, common=None, heads=None):
2173 def findmissingrevs(self, common=None, heads=None):
2172 """Return the revision numbers of the ancestors of heads that
2174 """Return the revision numbers of the ancestors of heads that
2173 are not ancestors of common.
2175 are not ancestors of common.
2174
2176
2175 More specifically, return a list of revision numbers corresponding to
2177 More specifically, return a list of revision numbers corresponding to
2176 nodes N such that every N satisfies the following constraints:
2178 nodes N such that every N satisfies the following constraints:
2177
2179
2178 1. N is an ancestor of some node in 'heads'
2180 1. N is an ancestor of some node in 'heads'
2179 2. N is not an ancestor of any node in 'common'
2181 2. N is not an ancestor of any node in 'common'
2180
2182
2181 The list is sorted by revision number, meaning it is
2183 The list is sorted by revision number, meaning it is
2182 topologically sorted.
2184 topologically sorted.
2183
2185
2184 'heads' and 'common' are both lists of revision numbers. If heads is
2186 'heads' and 'common' are both lists of revision numbers. If heads is
2185 not supplied, uses all of the revlog's heads. If common is not
2187 not supplied, uses all of the revlog's heads. If common is not
2186 supplied, uses nullid."""
2188 supplied, uses nullid."""
2187 if common is None:
2189 if common is None:
2188 common = [nullrev]
2190 common = [nullrev]
2189 if heads is None:
2191 if heads is None:
2190 heads = self.headrevs()
2192 heads = self.headrevs()
2191
2193
2192 inc = self.incrementalmissingrevs(common=common)
2194 inc = self.incrementalmissingrevs(common=common)
2193 return inc.missingancestors(heads)
2195 return inc.missingancestors(heads)
2194
2196
2195 def findmissing(self, common=None, heads=None):
2197 def findmissing(self, common=None, heads=None):
2196 """Return the ancestors of heads that are not ancestors of common.
2198 """Return the ancestors of heads that are not ancestors of common.
2197
2199
2198 More specifically, return a list of nodes N such that every N
2200 More specifically, return a list of nodes N such that every N
2199 satisfies the following constraints:
2201 satisfies the following constraints:
2200
2202
2201 1. N is an ancestor of some node in 'heads'
2203 1. N is an ancestor of some node in 'heads'
2202 2. N is not an ancestor of any node in 'common'
2204 2. N is not an ancestor of any node in 'common'
2203
2205
2204 The list is sorted by revision number, meaning it is
2206 The list is sorted by revision number, meaning it is
2205 topologically sorted.
2207 topologically sorted.
2206
2208
2207 'heads' and 'common' are both lists of node IDs. If heads is
2209 'heads' and 'common' are both lists of node IDs. If heads is
2208 not supplied, uses all of the revlog's heads. If common is not
2210 not supplied, uses all of the revlog's heads. If common is not
2209 supplied, uses nullid."""
2211 supplied, uses nullid."""
2210 if common is None:
2212 if common is None:
2211 common = [self.nullid]
2213 common = [self.nullid]
2212 if heads is None:
2214 if heads is None:
2213 heads = self.heads()
2215 heads = self.heads()
2214
2216
2215 common = [self.rev(n) for n in common]
2217 common = [self.rev(n) for n in common]
2216 heads = [self.rev(n) for n in heads]
2218 heads = [self.rev(n) for n in heads]
2217
2219
2218 inc = self.incrementalmissingrevs(common=common)
2220 inc = self.incrementalmissingrevs(common=common)
2219 return [self.node(r) for r in inc.missingancestors(heads)]
2221 return [self.node(r) for r in inc.missingancestors(heads)]
2220
2222
2221 def nodesbetween(self, roots=None, heads=None):
2223 def nodesbetween(self, roots=None, heads=None):
2222 """Return a topological path from 'roots' to 'heads'.
2224 """Return a topological path from 'roots' to 'heads'.
2223
2225
2224 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2226 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2225 topologically sorted list of all nodes N that satisfy both of
2227 topologically sorted list of all nodes N that satisfy both of
2226 these constraints:
2228 these constraints:
2227
2229
2228 1. N is a descendant of some node in 'roots'
2230 1. N is a descendant of some node in 'roots'
2229 2. N is an ancestor of some node in 'heads'
2231 2. N is an ancestor of some node in 'heads'
2230
2232
2231 Every node is considered to be both a descendant and an ancestor
2233 Every node is considered to be both a descendant and an ancestor
2232 of itself, so every reachable node in 'roots' and 'heads' will be
2234 of itself, so every reachable node in 'roots' and 'heads' will be
2233 included in 'nodes'.
2235 included in 'nodes'.
2234
2236
2235 'outroots' is the list of reachable nodes in 'roots', i.e., the
2237 'outroots' is the list of reachable nodes in 'roots', i.e., the
2236 subset of 'roots' that is returned in 'nodes'. Likewise,
2238 subset of 'roots' that is returned in 'nodes'. Likewise,
2237 'outheads' is the subset of 'heads' that is also in 'nodes'.
2239 'outheads' is the subset of 'heads' that is also in 'nodes'.
2238
2240
2239 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2241 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2240 unspecified, uses nullid as the only root. If 'heads' is
2242 unspecified, uses nullid as the only root. If 'heads' is
2241 unspecified, uses list of all of the revlog's heads."""
2243 unspecified, uses list of all of the revlog's heads."""
2242 nonodes = ([], [], [])
2244 nonodes = ([], [], [])
2243 if roots is not None:
2245 if roots is not None:
2244 roots = list(roots)
2246 roots = list(roots)
2245 if not roots:
2247 if not roots:
2246 return nonodes
2248 return nonodes
2247 lowestrev = min([self.rev(n) for n in roots])
2249 lowestrev = min([self.rev(n) for n in roots])
2248 else:
2250 else:
2249 roots = [self.nullid] # Everybody's a descendant of nullid
2251 roots = [self.nullid] # Everybody's a descendant of nullid
2250 lowestrev = nullrev
2252 lowestrev = nullrev
2251 if (lowestrev == nullrev) and (heads is None):
2253 if (lowestrev == nullrev) and (heads is None):
2252 # We want _all_ the nodes!
2254 # We want _all_ the nodes!
2253 return (
2255 return (
2254 [self.node(r) for r in self],
2256 [self.node(r) for r in self],
2255 [self.nullid],
2257 [self.nullid],
2256 list(self.heads()),
2258 list(self.heads()),
2257 )
2259 )
2258 if heads is None:
2260 if heads is None:
2259 # All nodes are ancestors, so the latest ancestor is the last
2261 # All nodes are ancestors, so the latest ancestor is the last
2260 # node.
2262 # node.
2261 highestrev = len(self) - 1
2263 highestrev = len(self) - 1
2262 # Set ancestors to None to signal that every node is an ancestor.
2264 # Set ancestors to None to signal that every node is an ancestor.
2263 ancestors = None
2265 ancestors = None
2264 # Set heads to an empty dictionary for later discovery of heads
2266 # Set heads to an empty dictionary for later discovery of heads
2265 heads = {}
2267 heads = {}
2266 else:
2268 else:
2267 heads = list(heads)
2269 heads = list(heads)
2268 if not heads:
2270 if not heads:
2269 return nonodes
2271 return nonodes
2270 ancestors = set()
2272 ancestors = set()
2271 # Turn heads into a dictionary so we can remove 'fake' heads.
2273 # Turn heads into a dictionary so we can remove 'fake' heads.
2272 # Also, later we will be using it to filter out the heads we can't
2274 # Also, later we will be using it to filter out the heads we can't
2273 # find from roots.
2275 # find from roots.
2274 heads = dict.fromkeys(heads, False)
2276 heads = dict.fromkeys(heads, False)
2275 # Start at the top and keep marking parents until we're done.
2277 # Start at the top and keep marking parents until we're done.
2276 nodestotag = set(heads)
2278 nodestotag = set(heads)
2277 # Remember where the top was so we can use it as a limit later.
2279 # Remember where the top was so we can use it as a limit later.
2278 highestrev = max([self.rev(n) for n in nodestotag])
2280 highestrev = max([self.rev(n) for n in nodestotag])
2279 while nodestotag:
2281 while nodestotag:
2280 # grab a node to tag
2282 # grab a node to tag
2281 n = nodestotag.pop()
2283 n = nodestotag.pop()
2282 # Never tag nullid
2284 # Never tag nullid
2283 if n == self.nullid:
2285 if n == self.nullid:
2284 continue
2286 continue
2285 # A node's revision number represents its place in a
2287 # A node's revision number represents its place in a
2286 # topologically sorted list of nodes.
2288 # topologically sorted list of nodes.
2287 r = self.rev(n)
2289 r = self.rev(n)
2288 if r >= lowestrev:
2290 if r >= lowestrev:
2289 if n not in ancestors:
2291 if n not in ancestors:
2290 # If we are possibly a descendant of one of the roots
2292 # If we are possibly a descendant of one of the roots
2291 # and we haven't already been marked as an ancestor
2293 # and we haven't already been marked as an ancestor
2292 ancestors.add(n) # Mark as ancestor
2294 ancestors.add(n) # Mark as ancestor
2293 # Add non-nullid parents to list of nodes to tag.
2295 # Add non-nullid parents to list of nodes to tag.
2294 nodestotag.update(
2296 nodestotag.update(
2295 [p for p in self.parents(n) if p != self.nullid]
2297 [p for p in self.parents(n) if p != self.nullid]
2296 )
2298 )
2297 elif n in heads: # We've seen it before, is it a fake head?
2299 elif n in heads: # We've seen it before, is it a fake head?
2298 # So it is, real heads should not be the ancestors of
2300 # So it is, real heads should not be the ancestors of
2299 # any other heads.
2301 # any other heads.
2300 heads.pop(n)
2302 heads.pop(n)
2301 if not ancestors:
2303 if not ancestors:
2302 return nonodes
2304 return nonodes
2303 # Now that we have our set of ancestors, we want to remove any
2305 # Now that we have our set of ancestors, we want to remove any
2304 # roots that are not ancestors.
2306 # roots that are not ancestors.
2305
2307
2306 # If one of the roots was nullid, everything is included anyway.
2308 # If one of the roots was nullid, everything is included anyway.
2307 if lowestrev > nullrev:
2309 if lowestrev > nullrev:
2308 # But, since we weren't, let's recompute the lowest rev to not
2310 # But, since we weren't, let's recompute the lowest rev to not
2309 # include roots that aren't ancestors.
2311 # include roots that aren't ancestors.
2310
2312
2311 # Filter out roots that aren't ancestors of heads
2313 # Filter out roots that aren't ancestors of heads
2312 roots = [root for root in roots if root in ancestors]
2314 roots = [root for root in roots if root in ancestors]
2313 # Recompute the lowest revision
2315 # Recompute the lowest revision
2314 if roots:
2316 if roots:
2315 lowestrev = min([self.rev(root) for root in roots])
2317 lowestrev = min([self.rev(root) for root in roots])
2316 else:
2318 else:
2317 # No more roots? Return empty list
2319 # No more roots? Return empty list
2318 return nonodes
2320 return nonodes
2319 else:
2321 else:
2320 # We are descending from nullid, and don't need to care about
2322 # We are descending from nullid, and don't need to care about
2321 # any other roots.
2323 # any other roots.
2322 lowestrev = nullrev
2324 lowestrev = nullrev
2323 roots = [self.nullid]
2325 roots = [self.nullid]
2324 # Transform our roots list into a set.
2326 # Transform our roots list into a set.
2325 descendants = set(roots)
2327 descendants = set(roots)
2326 # Also, keep the original roots so we can filter out roots that aren't
2328 # Also, keep the original roots so we can filter out roots that aren't
2327 # 'real' roots (i.e. are descended from other roots).
2329 # 'real' roots (i.e. are descended from other roots).
2328 roots = descendants.copy()
2330 roots = descendants.copy()
2329 # Our topologically sorted list of output nodes.
2331 # Our topologically sorted list of output nodes.
2330 orderedout = []
2332 orderedout = []
2331 # Don't start at nullid since we don't want nullid in our output list,
2333 # Don't start at nullid since we don't want nullid in our output list,
2332 # and if nullid shows up in descendants, empty parents will look like
2334 # and if nullid shows up in descendants, empty parents will look like
2333 # they're descendants.
2335 # they're descendants.
2334 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2336 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2335 n = self.node(r)
2337 n = self.node(r)
2336 isdescendant = False
2338 isdescendant = False
2337 if lowestrev == nullrev: # Everybody is a descendant of nullid
2339 if lowestrev == nullrev: # Everybody is a descendant of nullid
2338 isdescendant = True
2340 isdescendant = True
2339 elif n in descendants:
2341 elif n in descendants:
2340 # n is already a descendant
2342 # n is already a descendant
2341 isdescendant = True
2343 isdescendant = True
2342 # This check only needs to be done here because all the roots
2344 # This check only needs to be done here because all the roots
2343 # will start being marked is descendants before the loop.
2345 # will start being marked is descendants before the loop.
2344 if n in roots:
2346 if n in roots:
2345 # If n was a root, check if it's a 'real' root.
2347 # If n was a root, check if it's a 'real' root.
2346 p = tuple(self.parents(n))
2348 p = tuple(self.parents(n))
2347 # If any of its parents are descendants, it's not a root.
2349 # If any of its parents are descendants, it's not a root.
2348 if (p[0] in descendants) or (p[1] in descendants):
2350 if (p[0] in descendants) or (p[1] in descendants):
2349 roots.remove(n)
2351 roots.remove(n)
2350 else:
2352 else:
2351 p = tuple(self.parents(n))
2353 p = tuple(self.parents(n))
2352 # A node is a descendant if either of its parents are
2354 # A node is a descendant if either of its parents are
2353 # descendants. (We seeded the dependents list with the roots
2355 # descendants. (We seeded the dependents list with the roots
2354 # up there, remember?)
2356 # up there, remember?)
2355 if (p[0] in descendants) or (p[1] in descendants):
2357 if (p[0] in descendants) or (p[1] in descendants):
2356 descendants.add(n)
2358 descendants.add(n)
2357 isdescendant = True
2359 isdescendant = True
2358 if isdescendant and ((ancestors is None) or (n in ancestors)):
2360 if isdescendant and ((ancestors is None) or (n in ancestors)):
2359 # Only include nodes that are both descendants and ancestors.
2361 # Only include nodes that are both descendants and ancestors.
2360 orderedout.append(n)
2362 orderedout.append(n)
2361 if (ancestors is not None) and (n in heads):
2363 if (ancestors is not None) and (n in heads):
2362 # We're trying to figure out which heads are reachable
2364 # We're trying to figure out which heads are reachable
2363 # from roots.
2365 # from roots.
2364 # Mark this head as having been reached
2366 # Mark this head as having been reached
2365 heads[n] = True
2367 heads[n] = True
2366 elif ancestors is None:
2368 elif ancestors is None:
2367 # Otherwise, we're trying to discover the heads.
2369 # Otherwise, we're trying to discover the heads.
2368 # Assume this is a head because if it isn't, the next step
2370 # Assume this is a head because if it isn't, the next step
2369 # will eventually remove it.
2371 # will eventually remove it.
2370 heads[n] = True
2372 heads[n] = True
2371 # But, obviously its parents aren't.
2373 # But, obviously its parents aren't.
2372 for p in self.parents(n):
2374 for p in self.parents(n):
2373 heads.pop(p, None)
2375 heads.pop(p, None)
2374 heads = [head for head, flag in heads.items() if flag]
2376 heads = [head for head, flag in heads.items() if flag]
2375 roots = list(roots)
2377 roots = list(roots)
2376 assert orderedout
2378 assert orderedout
2377 assert roots
2379 assert roots
2378 assert heads
2380 assert heads
2379 return (orderedout, roots, heads)
2381 return (orderedout, roots, heads)
2380
2382
2381 def headrevs(self, revs=None):
2383 def headrevs(self, revs=None):
2382 if revs is None:
2384 if revs is None:
2383 try:
2385 try:
2384 return self.index.headrevs()
2386 return self.index.headrevs()
2385 except AttributeError:
2387 except AttributeError:
2386 return self._headrevs()
2388 return self._headrevs()
2387 if rustdagop is not None and self.index.rust_ext_compat:
2389 if rustdagop is not None and self.index.rust_ext_compat:
2388 return rustdagop.headrevs(self.index, revs)
2390 return rustdagop.headrevs(self.index, revs)
2389 return dagop.headrevs(revs, self._uncheckedparentrevs)
2391 return dagop.headrevs(revs, self._uncheckedparentrevs)
2390
2392
2391 def headrevsdiff(self, start, stop):
2393 def headrevsdiff(self, start, stop):
2392 try:
2394 try:
2393 return self.index.headrevsdiff(start, stop)
2395 return self.index.headrevsdiff(start, stop)
2394 except AttributeError:
2396 except AttributeError:
2395 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2397 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2396
2398
2397 def computephases(self, roots):
2399 def computephases(self, roots):
2398 return self.index.computephasesmapsets(roots)
2400 return self.index.computephasesmapsets(roots)
2399
2401
2400 def _headrevs(self):
2402 def _headrevs(self):
2401 count = len(self)
2403 count = len(self)
2402 if not count:
2404 if not count:
2403 return [nullrev]
2405 return [nullrev]
2404 # we won't iter over filtered rev so nobody is a head at start
2406 # we won't iter over filtered rev so nobody is a head at start
2405 ishead = [0] * (count + 1)
2407 ishead = [0] * (count + 1)
2406 index = self.index
2408 index = self.index
2407 for r in self:
2409 for r in self:
2408 ishead[r] = 1 # I may be an head
2410 ishead[r] = 1 # I may be an head
2409 e = index[r]
2411 e = index[r]
2410 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2412 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2411 return [r for r, val in enumerate(ishead) if val]
2413 return [r for r, val in enumerate(ishead) if val]
2412
2414
2413 def _head_node_ids(self):
2415 def _head_node_ids(self):
2414 try:
2416 try:
2415 return self.index.head_node_ids()
2417 return self.index.head_node_ids()
2416 except AttributeError:
2418 except AttributeError:
2417 return [self.node(r) for r in self.headrevs()]
2419 return [self.node(r) for r in self.headrevs()]
2418
2420
2419 def heads(self, start=None, stop=None):
2421 def heads(self, start=None, stop=None):
2420 """return the list of all nodes that have no children
2422 """return the list of all nodes that have no children
2421
2423
2422 if start is specified, only heads that are descendants of
2424 if start is specified, only heads that are descendants of
2423 start will be returned
2425 start will be returned
2424 if stop is specified, it will consider all the revs from stop
2426 if stop is specified, it will consider all the revs from stop
2425 as if they had no children
2427 as if they had no children
2426 """
2428 """
2427 if start is None and stop is None:
2429 if start is None and stop is None:
2428 if not len(self):
2430 if not len(self):
2429 return [self.nullid]
2431 return [self.nullid]
2430 return self._head_node_ids()
2432 return self._head_node_ids()
2431 if start is None:
2433 if start is None:
2432 start = nullrev
2434 start = nullrev
2433 else:
2435 else:
2434 start = self.rev(start)
2436 start = self.rev(start)
2435
2437
2436 stoprevs = {self.rev(n) for n in stop or []}
2438 stoprevs = {self.rev(n) for n in stop or []}
2437
2439
2438 revs = dagop.headrevssubset(
2440 revs = dagop.headrevssubset(
2439 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2441 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2440 )
2442 )
2441
2443
2442 return [self.node(rev) for rev in revs]
2444 return [self.node(rev) for rev in revs]
2443
2445
2444 def diffheads(self, start, stop):
2446 def diffheads(self, start, stop):
2445 """return the nodes that make up the difference between
2447 """return the nodes that make up the difference between
2446 heads of revs before `start` and heads of revs before `stop`"""
2448 heads of revs before `start` and heads of revs before `stop`"""
2447 removed, added = self.headrevsdiff(start, stop)
2449 removed, added = self.headrevsdiff(start, stop)
2448 return [self.node(r) for r in removed], [self.node(r) for r in added]
2450 return [self.node(r) for r in removed], [self.node(r) for r in added]
2449
2451
2450 def children(self, node):
2452 def children(self, node):
2451 """find the children of a given node"""
2453 """find the children of a given node"""
2452 c = []
2454 c = []
2453 p = self.rev(node)
2455 p = self.rev(node)
2454 for r in self.revs(start=p + 1):
2456 for r in self.revs(start=p + 1):
2455 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2457 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2456 if prevs:
2458 if prevs:
2457 for pr in prevs:
2459 for pr in prevs:
2458 if pr == p:
2460 if pr == p:
2459 c.append(self.node(r))
2461 c.append(self.node(r))
2460 elif p == nullrev:
2462 elif p == nullrev:
2461 c.append(self.node(r))
2463 c.append(self.node(r))
2462 return c
2464 return c
2463
2465
2464 def commonancestorsheads(self, a, b):
2466 def commonancestorsheads(self, a, b):
2465 """calculate all the heads of the common ancestors of nodes a and b"""
2467 """calculate all the heads of the common ancestors of nodes a and b"""
2466 a, b = self.rev(a), self.rev(b)
2468 a, b = self.rev(a), self.rev(b)
2467 ancs = self._commonancestorsheads(a, b)
2469 ancs = self._commonancestorsheads(a, b)
2468 return pycompat.maplist(self.node, ancs)
2470 return pycompat.maplist(self.node, ancs)
2469
2471
2470 def _commonancestorsheads(self, *revs):
2472 def _commonancestorsheads(self, *revs):
2471 """calculate all the heads of the common ancestors of revs"""
2473 """calculate all the heads of the common ancestors of revs"""
2472 try:
2474 try:
2473 ancs = self.index.commonancestorsheads(*revs)
2475 ancs = self.index.commonancestorsheads(*revs)
2474 except (AttributeError, OverflowError): # C implementation failed
2476 except (AttributeError, OverflowError): # C implementation failed
2475 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2477 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2476 return ancs
2478 return ancs
2477
2479
2478 def isancestor(self, a, b):
2480 def isancestor(self, a, b):
2479 """return True if node a is an ancestor of node b
2481 """return True if node a is an ancestor of node b
2480
2482
2481 A revision is considered an ancestor of itself."""
2483 A revision is considered an ancestor of itself."""
2482 a, b = self.rev(a), self.rev(b)
2484 a, b = self.rev(a), self.rev(b)
2483 return self.isancestorrev(a, b)
2485 return self.isancestorrev(a, b)
2484
2486
2485 def isancestorrev(self, a, b):
2487 def isancestorrev(self, a, b):
2486 """return True if revision a is an ancestor of revision b
2488 """return True if revision a is an ancestor of revision b
2487
2489
2488 A revision is considered an ancestor of itself.
2490 A revision is considered an ancestor of itself.
2489
2491
2490 The implementation of this is trivial but the use of
2492 The implementation of this is trivial but the use of
2491 reachableroots is not."""
2493 reachableroots is not."""
2492 if a == nullrev:
2494 if a == nullrev:
2493 return True
2495 return True
2494 elif a == b:
2496 elif a == b:
2495 return True
2497 return True
2496 elif a > b:
2498 elif a > b:
2497 return False
2499 return False
2498 return bool(self.reachableroots(a, [b], [a], includepath=False))
2500 return bool(self.reachableroots(a, [b], [a], includepath=False))
2499
2501
2500 def reachableroots(self, minroot, heads, roots, includepath=False):
2502 def reachableroots(self, minroot, heads, roots, includepath=False):
2501 """return (heads(::(<roots> and <roots>::<heads>)))
2503 """return (heads(::(<roots> and <roots>::<heads>)))
2502
2504
2503 If includepath is True, return (<roots>::<heads>)."""
2505 If includepath is True, return (<roots>::<heads>)."""
2504 try:
2506 try:
2505 return self.index.reachableroots2(
2507 return self.index.reachableroots2(
2506 minroot, heads, roots, includepath
2508 minroot, heads, roots, includepath
2507 )
2509 )
2508 except AttributeError:
2510 except AttributeError:
2509 return dagop._reachablerootspure(
2511 return dagop._reachablerootspure(
2510 self.parentrevs, minroot, roots, heads, includepath
2512 self.parentrevs, minroot, roots, heads, includepath
2511 )
2513 )
2512
2514
2513 def ancestor(self, a, b):
2515 def ancestor(self, a, b):
2514 """calculate the "best" common ancestor of nodes a and b"""
2516 """calculate the "best" common ancestor of nodes a and b"""
2515
2517
2516 a, b = self.rev(a), self.rev(b)
2518 a, b = self.rev(a), self.rev(b)
2517 try:
2519 try:
2518 ancs = self.index.ancestors(a, b)
2520 ancs = self.index.ancestors(a, b)
2519 except (AttributeError, OverflowError):
2521 except (AttributeError, OverflowError):
2520 ancs = ancestor.ancestors(self.parentrevs, a, b)
2522 ancs = ancestor.ancestors(self.parentrevs, a, b)
2521 if ancs:
2523 if ancs:
2522 # choose a consistent winner when there's a tie
2524 # choose a consistent winner when there's a tie
2523 return min(map(self.node, ancs))
2525 return min(map(self.node, ancs))
2524 return self.nullid
2526 return self.nullid
2525
2527
2526 def _match(self, id):
2528 def _match(self, id):
2527 if isinstance(id, int):
2529 if isinstance(id, int):
2528 # rev
2530 # rev
2529 return self.node(id)
2531 return self.node(id)
2530 if len(id) == self.nodeconstants.nodelen:
2532 if len(id) == self.nodeconstants.nodelen:
2531 # possibly a binary node
2533 # possibly a binary node
2532 # odds of a binary node being all hex in ASCII are 1 in 10**25
2534 # odds of a binary node being all hex in ASCII are 1 in 10**25
2533 try:
2535 try:
2534 node = id
2536 node = id
2535 self.rev(node) # quick search the index
2537 self.rev(node) # quick search the index
2536 return node
2538 return node
2537 except error.LookupError:
2539 except error.LookupError:
2538 pass # may be partial hex id
2540 pass # may be partial hex id
2539 try:
2541 try:
2540 # str(rev)
2542 # str(rev)
2541 rev = int(id)
2543 rev = int(id)
2542 if b"%d" % rev != id:
2544 if b"%d" % rev != id:
2543 raise ValueError
2545 raise ValueError
2544 if rev < 0:
2546 if rev < 0:
2545 rev = len(self) + rev
2547 rev = len(self) + rev
2546 if rev < 0 or rev >= len(self):
2548 if rev < 0 or rev >= len(self):
2547 raise ValueError
2549 raise ValueError
2548 return self.node(rev)
2550 return self.node(rev)
2549 except (ValueError, OverflowError):
2551 except (ValueError, OverflowError):
2550 pass
2552 pass
2551 if len(id) == 2 * self.nodeconstants.nodelen:
2553 if len(id) == 2 * self.nodeconstants.nodelen:
2552 try:
2554 try:
2553 # a full hex nodeid?
2555 # a full hex nodeid?
2554 node = bin(id)
2556 node = bin(id)
2555 self.rev(node)
2557 self.rev(node)
2556 return node
2558 return node
2557 except (binascii.Error, error.LookupError):
2559 except (binascii.Error, error.LookupError):
2558 pass
2560 pass
2559
2561
2560 def _partialmatch(self, id):
2562 def _partialmatch(self, id):
2561 # we don't care wdirfilenodeids as they should be always full hash
2563 # we don't care wdirfilenodeids as they should be always full hash
2562 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2564 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2563 ambiguous = False
2565 ambiguous = False
2564 try:
2566 try:
2565 partial = self.index.partialmatch(id)
2567 partial = self.index.partialmatch(id)
2566 if partial and self.hasnode(partial):
2568 if partial and self.hasnode(partial):
2567 if maybewdir:
2569 if maybewdir:
2568 # single 'ff...' match in radix tree, ambiguous with wdir
2570 # single 'ff...' match in radix tree, ambiguous with wdir
2569 ambiguous = True
2571 ambiguous = True
2570 else:
2572 else:
2571 return partial
2573 return partial
2572 elif maybewdir:
2574 elif maybewdir:
2573 # no 'ff...' match in radix tree, wdir identified
2575 # no 'ff...' match in radix tree, wdir identified
2574 raise error.WdirUnsupported
2576 raise error.WdirUnsupported
2575 else:
2577 else:
2576 return None
2578 return None
2577 except error.RevlogError:
2579 except error.RevlogError:
2578 # parsers.c radix tree lookup gave multiple matches
2580 # parsers.c radix tree lookup gave multiple matches
2579 # fast path: for unfiltered changelog, radix tree is accurate
2581 # fast path: for unfiltered changelog, radix tree is accurate
2580 if not getattr(self, 'filteredrevs', None):
2582 if not getattr(self, 'filteredrevs', None):
2581 ambiguous = True
2583 ambiguous = True
2582 # fall through to slow path that filters hidden revisions
2584 # fall through to slow path that filters hidden revisions
2583 except (AttributeError, ValueError):
2585 except (AttributeError, ValueError):
2584 # we are pure python, or key is not hex
2586 # we are pure python, or key is not hex
2585 pass
2587 pass
2586 if ambiguous:
2588 if ambiguous:
2587 raise error.AmbiguousPrefixLookupError(
2589 raise error.AmbiguousPrefixLookupError(
2588 id, self.display_id, _(b'ambiguous identifier')
2590 id, self.display_id, _(b'ambiguous identifier')
2589 )
2591 )
2590
2592
2591 if id in self._pcache:
2593 if id in self._pcache:
2592 return self._pcache[id]
2594 return self._pcache[id]
2593
2595
2594 if len(id) <= 40:
2596 if len(id) <= 40:
2595 # hex(node)[:...]
2597 # hex(node)[:...]
2596 l = len(id) // 2 * 2 # grab an even number of digits
2598 l = len(id) // 2 * 2 # grab an even number of digits
2597 try:
2599 try:
2598 # we're dropping the last digit, so let's check that it's hex,
2600 # we're dropping the last digit, so let's check that it's hex,
2599 # to avoid the expensive computation below if it's not
2601 # to avoid the expensive computation below if it's not
2600 if len(id) % 2 > 0:
2602 if len(id) % 2 > 0:
2601 if not (id[-1] in hexdigits):
2603 if not (id[-1] in hexdigits):
2602 return None
2604 return None
2603 prefix = bin(id[:l])
2605 prefix = bin(id[:l])
2604 except binascii.Error:
2606 except binascii.Error:
2605 pass
2607 pass
2606 else:
2608 else:
2607 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2609 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2608 nl = [
2610 nl = [
2609 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2611 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2610 ]
2612 ]
2611 if self.nodeconstants.nullhex.startswith(id):
2613 if self.nodeconstants.nullhex.startswith(id):
2612 nl.append(self.nullid)
2614 nl.append(self.nullid)
2613 if len(nl) > 0:
2615 if len(nl) > 0:
2614 if len(nl) == 1 and not maybewdir:
2616 if len(nl) == 1 and not maybewdir:
2615 self._pcache[id] = nl[0]
2617 self._pcache[id] = nl[0]
2616 return nl[0]
2618 return nl[0]
2617 raise error.AmbiguousPrefixLookupError(
2619 raise error.AmbiguousPrefixLookupError(
2618 id, self.display_id, _(b'ambiguous identifier')
2620 id, self.display_id, _(b'ambiguous identifier')
2619 )
2621 )
2620 if maybewdir:
2622 if maybewdir:
2621 raise error.WdirUnsupported
2623 raise error.WdirUnsupported
2622 return None
2624 return None
2623
2625
2624 def lookup(self, id):
2626 def lookup(self, id):
2625 """locate a node based on:
2627 """locate a node based on:
2626 - revision number or str(revision number)
2628 - revision number or str(revision number)
2627 - nodeid or subset of hex nodeid
2629 - nodeid or subset of hex nodeid
2628 """
2630 """
2629 n = self._match(id)
2631 n = self._match(id)
2630 if n is not None:
2632 if n is not None:
2631 return n
2633 return n
2632 n = self._partialmatch(id)
2634 n = self._partialmatch(id)
2633 if n:
2635 if n:
2634 return n
2636 return n
2635
2637
2636 raise error.LookupError(id, self.display_id, _(b'no match found'))
2638 raise error.LookupError(id, self.display_id, _(b'no match found'))
2637
2639
2638 def shortest(self, node, minlength=1):
2640 def shortest(self, node, minlength=1):
2639 """Find the shortest unambiguous prefix that matches node."""
2641 """Find the shortest unambiguous prefix that matches node."""
2640
2642
2641 def isvalid(prefix):
2643 def isvalid(prefix):
2642 try:
2644 try:
2643 matchednode = self._partialmatch(prefix)
2645 matchednode = self._partialmatch(prefix)
2644 except error.AmbiguousPrefixLookupError:
2646 except error.AmbiguousPrefixLookupError:
2645 return False
2647 return False
2646 except error.WdirUnsupported:
2648 except error.WdirUnsupported:
2647 # single 'ff...' match
2649 # single 'ff...' match
2648 return True
2650 return True
2649 if matchednode is None:
2651 if matchednode is None:
2650 raise error.LookupError(node, self.display_id, _(b'no node'))
2652 raise error.LookupError(node, self.display_id, _(b'no node'))
2651 return True
2653 return True
2652
2654
2653 def maybewdir(prefix):
2655 def maybewdir(prefix):
2654 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2656 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2655
2657
2656 hexnode = hex(node)
2658 hexnode = hex(node)
2657
2659
2658 def disambiguate(hexnode, minlength):
2660 def disambiguate(hexnode, minlength):
2659 """Disambiguate against wdirid."""
2661 """Disambiguate against wdirid."""
2660 for length in range(minlength, len(hexnode) + 1):
2662 for length in range(minlength, len(hexnode) + 1):
2661 prefix = hexnode[:length]
2663 prefix = hexnode[:length]
2662 if not maybewdir(prefix):
2664 if not maybewdir(prefix):
2663 return prefix
2665 return prefix
2664
2666
2665 if not getattr(self, 'filteredrevs', None):
2667 if not getattr(self, 'filteredrevs', None):
2666 try:
2668 try:
2667 length = max(self.index.shortest(node), minlength)
2669 length = max(self.index.shortest(node), minlength)
2668 return disambiguate(hexnode, length)
2670 return disambiguate(hexnode, length)
2669 except error.RevlogError:
2671 except error.RevlogError:
2670 if node != self.nodeconstants.wdirid:
2672 if node != self.nodeconstants.wdirid:
2671 raise error.LookupError(
2673 raise error.LookupError(
2672 node, self.display_id, _(b'no node')
2674 node, self.display_id, _(b'no node')
2673 )
2675 )
2674 except AttributeError:
2676 except AttributeError:
2675 # Fall through to pure code
2677 # Fall through to pure code
2676 pass
2678 pass
2677
2679
2678 if node == self.nodeconstants.wdirid:
2680 if node == self.nodeconstants.wdirid:
2679 for length in range(minlength, len(hexnode) + 1):
2681 for length in range(minlength, len(hexnode) + 1):
2680 prefix = hexnode[:length]
2682 prefix = hexnode[:length]
2681 if isvalid(prefix):
2683 if isvalid(prefix):
2682 return prefix
2684 return prefix
2683
2685
2684 for length in range(minlength, len(hexnode) + 1):
2686 for length in range(minlength, len(hexnode) + 1):
2685 prefix = hexnode[:length]
2687 prefix = hexnode[:length]
2686 if isvalid(prefix):
2688 if isvalid(prefix):
2687 return disambiguate(hexnode, length)
2689 return disambiguate(hexnode, length)
2688
2690
2689 def cmp(self, node, text):
2691 def cmp(self, node, text):
2690 """compare text with a given file revision
2692 """compare text with a given file revision
2691
2693
2692 returns True if text is different than what is stored.
2694 returns True if text is different than what is stored.
2693 """
2695 """
2694 p1, p2 = self.parents(node)
2696 p1, p2 = self.parents(node)
2695 return storageutil.hashrevisionsha1(text, p1, p2) != node
2697 return storageutil.hashrevisionsha1(text, p1, p2) != node
2696
2698
2697 def deltaparent(self, rev):
2699 def deltaparent(self, rev):
2698 """return deltaparent of the given revision"""
2700 """return deltaparent of the given revision"""
2699 base = self.index[rev][3]
2701 base = self.index[rev][3]
2700 if base == rev:
2702 if base == rev:
2701 return nullrev
2703 return nullrev
2702 elif self.delta_config.general_delta:
2704 elif self.delta_config.general_delta:
2703 return base
2705 return base
2704 else:
2706 else:
2705 return rev - 1
2707 return rev - 1
2706
2708
2707 def issnapshot(self, rev):
2709 def issnapshot(self, rev):
2708 """tells whether rev is a snapshot"""
2710 """tells whether rev is a snapshot"""
2709 ret = self._inner.issnapshot(rev)
2711 ret = self._inner.issnapshot(rev)
2710 self.issnapshot = self._inner.issnapshot
2712 self.issnapshot = self._inner.issnapshot
2711 return ret
2713 return ret
2712
2714
2713 def snapshotdepth(self, rev):
2715 def snapshotdepth(self, rev):
2714 """number of snapshot in the chain before this one"""
2716 """number of snapshot in the chain before this one"""
2715 if not self.issnapshot(rev):
2717 if not self.issnapshot(rev):
2716 raise error.ProgrammingError(b'revision %d not a snapshot')
2718 raise error.ProgrammingError(b'revision %d not a snapshot')
2717 return len(self._inner._deltachain(rev)[0]) - 1
2719 return len(self._inner._deltachain(rev)[0]) - 1
2718
2720
2719 def revdiff(self, rev1, rev2):
2721 def revdiff(self, rev1, rev2):
2720 """return or calculate a delta between two revisions
2722 """return or calculate a delta between two revisions
2721
2723
2722 The delta calculated is in binary form and is intended to be written to
2724 The delta calculated is in binary form and is intended to be written to
2723 revlog data directly. So this function needs raw revision data.
2725 revlog data directly. So this function needs raw revision data.
2724 """
2726 """
2725 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2727 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2726 return bytes(self._inner._chunk(rev2))
2728 return bytes(self._inner._chunk(rev2))
2727
2729
2728 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2730 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2729
2731
2730 def revision(self, nodeorrev):
2732 def revision(self, nodeorrev):
2731 """return an uncompressed revision of a given node or revision
2733 """return an uncompressed revision of a given node or revision
2732 number.
2734 number.
2733 """
2735 """
2734 return self._revisiondata(nodeorrev)
2736 return self._revisiondata(nodeorrev)
2735
2737
2736 def sidedata(self, nodeorrev):
2738 def sidedata(self, nodeorrev):
2737 """a map of extra data related to the changeset but not part of the hash
2739 """a map of extra data related to the changeset but not part of the hash
2738
2740
2739 This function currently return a dictionary. However, more advanced
2741 This function currently return a dictionary. However, more advanced
2740 mapping object will likely be used in the future for a more
2742 mapping object will likely be used in the future for a more
2741 efficient/lazy code.
2743 efficient/lazy code.
2742 """
2744 """
2743 # deal with <nodeorrev> argument type
2745 # deal with <nodeorrev> argument type
2744 if isinstance(nodeorrev, int):
2746 if isinstance(nodeorrev, int):
2745 rev = nodeorrev
2747 rev = nodeorrev
2746 else:
2748 else:
2747 rev = self.rev(nodeorrev)
2749 rev = self.rev(nodeorrev)
2748 return self._sidedata(rev)
2750 return self._sidedata(rev)
2749
2751
2750 def _rawtext(self, node, rev):
2752 def _rawtext(self, node, rev):
2751 """return the possibly unvalidated rawtext for a revision
2753 """return the possibly unvalidated rawtext for a revision
2752
2754
2753 returns (rev, rawtext, validated)
2755 returns (rev, rawtext, validated)
2754 """
2756 """
2755 # Check if we have the entry in cache
2757 # Check if we have the entry in cache
2756 # The cache entry looks like (node, rev, rawtext)
2758 # The cache entry looks like (node, rev, rawtext)
2757 if self._inner._revisioncache:
2759 if self._inner._revisioncache:
2758 if self._inner._revisioncache[0] == node:
2760 if self._inner._revisioncache[0] == node:
2759 return (rev, self._inner._revisioncache[2], True)
2761 return (rev, self._inner._revisioncache[2], True)
2760
2762
2761 if rev is None:
2763 if rev is None:
2762 rev = self.rev(node)
2764 rev = self.rev(node)
2763
2765
2764 return self._inner.raw_text(node, rev)
2766 return self._inner.raw_text(node, rev)
2765
2767
2766 def _revisiondata(self, nodeorrev, raw=False):
2768 def _revisiondata(self, nodeorrev, raw=False):
2767 # deal with <nodeorrev> argument type
2769 # deal with <nodeorrev> argument type
2768 if isinstance(nodeorrev, int):
2770 if isinstance(nodeorrev, int):
2769 rev = nodeorrev
2771 rev = nodeorrev
2770 node = self.node(rev)
2772 node = self.node(rev)
2771 else:
2773 else:
2772 node = nodeorrev
2774 node = nodeorrev
2773 rev = None
2775 rev = None
2774
2776
2775 # fast path the special `nullid` rev
2777 # fast path the special `nullid` rev
2776 if node == self.nullid:
2778 if node == self.nullid:
2777 return b""
2779 return b""
2778
2780
2779 # ``rawtext`` is the text as stored inside the revlog. Might be the
2781 # ``rawtext`` is the text as stored inside the revlog. Might be the
2780 # revision or might need to be processed to retrieve the revision.
2782 # revision or might need to be processed to retrieve the revision.
2781 rev, rawtext, validated = self._rawtext(node, rev)
2783 rev, rawtext, validated = self._rawtext(node, rev)
2782
2784
2783 if raw and validated:
2785 if raw and validated:
2784 # if we don't want to process the raw text and that raw
2786 # if we don't want to process the raw text and that raw
2785 # text is cached, we can exit early.
2787 # text is cached, we can exit early.
2786 return rawtext
2788 return rawtext
2787 if rev is None:
2789 if rev is None:
2788 rev = self.rev(node)
2790 rev = self.rev(node)
2789 # the revlog's flag for this revision
2791 # the revlog's flag for this revision
2790 # (usually alter its state or content)
2792 # (usually alter its state or content)
2791 flags = self.flags(rev)
2793 flags = self.flags(rev)
2792
2794
2793 if validated and flags == REVIDX_DEFAULT_FLAGS:
2795 if validated and flags == REVIDX_DEFAULT_FLAGS:
2794 # no extra flags set, no flag processor runs, text = rawtext
2796 # no extra flags set, no flag processor runs, text = rawtext
2795 return rawtext
2797 return rawtext
2796
2798
2797 if raw:
2799 if raw:
2798 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2800 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2799 text = rawtext
2801 text = rawtext
2800 else:
2802 else:
2801 r = flagutil.processflagsread(self, rawtext, flags)
2803 r = flagutil.processflagsread(self, rawtext, flags)
2802 text, validatehash = r
2804 text, validatehash = r
2803 if validatehash:
2805 if validatehash:
2804 self.checkhash(text, node, rev=rev)
2806 self.checkhash(text, node, rev=rev)
2805 if not validated:
2807 if not validated:
2806 self._inner._revisioncache = (node, rev, rawtext)
2808 self._inner._revisioncache = (node, rev, rawtext)
2807
2809
2808 return text
2810 return text
2809
2811
2810 def _sidedata(self, rev):
2812 def _sidedata(self, rev):
2811 """Return the sidedata for a given revision number."""
2813 """Return the sidedata for a given revision number."""
2812 sidedata_end = None
2814 sidedata_end = None
2813 if self._docket is not None:
2815 if self._docket is not None:
2814 sidedata_end = self._docket.sidedata_end
2816 sidedata_end = self._docket.sidedata_end
2815 return self._inner.sidedata(rev, sidedata_end)
2817 return self._inner.sidedata(rev, sidedata_end)
2816
2818
2817 def rawdata(self, nodeorrev):
2819 def rawdata(self, nodeorrev):
2818 """return an uncompressed raw data of a given node or revision number."""
2820 """return an uncompressed raw data of a given node or revision number."""
2819 return self._revisiondata(nodeorrev, raw=True)
2821 return self._revisiondata(nodeorrev, raw=True)
2820
2822
2821 def hash(self, text, p1, p2):
2823 def hash(self, text, p1, p2):
2822 """Compute a node hash.
2824 """Compute a node hash.
2823
2825
2824 Available as a function so that subclasses can replace the hash
2826 Available as a function so that subclasses can replace the hash
2825 as needed.
2827 as needed.
2826 """
2828 """
2827 return storageutil.hashrevisionsha1(text, p1, p2)
2829 return storageutil.hashrevisionsha1(text, p1, p2)
2828
2830
2829 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2831 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2830 """Check node hash integrity.
2832 """Check node hash integrity.
2831
2833
2832 Available as a function so that subclasses can extend hash mismatch
2834 Available as a function so that subclasses can extend hash mismatch
2833 behaviors as needed.
2835 behaviors as needed.
2834 """
2836 """
2835 try:
2837 try:
2836 if p1 is None and p2 is None:
2838 if p1 is None and p2 is None:
2837 p1, p2 = self.parents(node)
2839 p1, p2 = self.parents(node)
2838 if node != self.hash(text, p1, p2):
2840 if node != self.hash(text, p1, p2):
2839 # Clear the revision cache on hash failure. The revision cache
2841 # Clear the revision cache on hash failure. The revision cache
2840 # only stores the raw revision and clearing the cache does have
2842 # only stores the raw revision and clearing the cache does have
2841 # the side-effect that we won't have a cache hit when the raw
2843 # the side-effect that we won't have a cache hit when the raw
2842 # revision data is accessed. But this case should be rare and
2844 # revision data is accessed. But this case should be rare and
2843 # it is extra work to teach the cache about the hash
2845 # it is extra work to teach the cache about the hash
2844 # verification state.
2846 # verification state.
2845 if (
2847 if (
2846 self._inner._revisioncache
2848 self._inner._revisioncache
2847 and self._inner._revisioncache[0] == node
2849 and self._inner._revisioncache[0] == node
2848 ):
2850 ):
2849 self._inner._revisioncache = None
2851 self._inner._revisioncache = None
2850
2852
2851 revornode = rev
2853 revornode = rev
2852 if revornode is None:
2854 if revornode is None:
2853 revornode = templatefilters.short(hex(node))
2855 revornode = templatefilters.short(hex(node))
2854 raise error.RevlogError(
2856 raise error.RevlogError(
2855 _(b"integrity check failed on %s:%s")
2857 _(b"integrity check failed on %s:%s")
2856 % (self.display_id, pycompat.bytestr(revornode))
2858 % (self.display_id, pycompat.bytestr(revornode))
2857 )
2859 )
2858 except error.RevlogError:
2860 except error.RevlogError:
2859 if self.feature_config.censorable and storageutil.iscensoredtext(
2861 if self.feature_config.censorable and storageutil.iscensoredtext(
2860 text
2862 text
2861 ):
2863 ):
2862 raise error.CensoredNodeError(self.display_id, node, text)
2864 raise error.CensoredNodeError(self.display_id, node, text)
2863 raise
2865 raise
2864
2866
2865 @property
2867 @property
2866 def _split_index_file(self):
2868 def _split_index_file(self):
2867 """the path where to expect the index of an ongoing splitting operation
2869 """the path where to expect the index of an ongoing splitting operation
2868
2870
2869 The file will only exist if a splitting operation is in progress, but
2871 The file will only exist if a splitting operation is in progress, but
2870 it is always expected at the same location."""
2872 it is always expected at the same location."""
2871 parts = self.radix.split(b'/')
2873 parts = self.radix.split(b'/')
2872 if len(parts) > 1:
2874 if len(parts) > 1:
2873 # adds a '-s' prefix to the ``data/` or `meta/` base
2875 # adds a '-s' prefix to the ``data/` or `meta/` base
2874 head = parts[0] + b'-s'
2876 head = parts[0] + b'-s'
2875 mids = parts[1:-1]
2877 mids = parts[1:-1]
2876 tail = parts[-1] + b'.i'
2878 tail = parts[-1] + b'.i'
2877 pieces = [head] + mids + [tail]
2879 pieces = [head] + mids + [tail]
2878 return b'/'.join(pieces)
2880 return b'/'.join(pieces)
2879 else:
2881 else:
2880 # the revlog is stored at the root of the store (changelog or
2882 # the revlog is stored at the root of the store (changelog or
2881 # manifest), no risk of collision.
2883 # manifest), no risk of collision.
2882 return self.radix + b'.i.s'
2884 return self.radix + b'.i.s'
2883
2885
2884 def _enforceinlinesize(self, tr):
2886 def _enforceinlinesize(self, tr):
2885 """Check if the revlog is too big for inline and convert if so.
2887 """Check if the revlog is too big for inline and convert if so.
2886
2888
2887 This should be called after revisions are added to the revlog. If the
2889 This should be called after revisions are added to the revlog. If the
2888 revlog has grown too large to be an inline revlog, it will convert it
2890 revlog has grown too large to be an inline revlog, it will convert it
2889 to use multiple index and data files.
2891 to use multiple index and data files.
2890 """
2892 """
2891 tiprev = len(self) - 1
2893 tiprev = len(self) - 1
2892 total_size = self.start(tiprev) + self.length(tiprev)
2894 total_size = self.start(tiprev) + self.length(tiprev)
2893 if not self._inline or (self._may_inline and total_size < _maxinline):
2895 if not self._inline or (self._may_inline and total_size < _maxinline):
2894 return
2896 return
2895
2897
2896 if self._docket is not None:
2898 if self._docket is not None:
2897 msg = b"inline revlog should not have a docket"
2899 msg = b"inline revlog should not have a docket"
2898 raise error.ProgrammingError(msg)
2900 raise error.ProgrammingError(msg)
2899
2901
2900 # In the common case, we enforce inline size because the revlog has
2902 # In the common case, we enforce inline size because the revlog has
2901 # been appened too. And in such case, it must have an initial offset
2903 # been appened too. And in such case, it must have an initial offset
2902 # recorded in the transaction.
2904 # recorded in the transaction.
2903 troffset = tr.findoffset(self._inner.canonical_index_file)
2905 troffset = tr.findoffset(self._inner.canonical_index_file)
2904 pre_touched = troffset is not None
2906 pre_touched = troffset is not None
2905 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2907 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2906 raise error.RevlogError(
2908 raise error.RevlogError(
2907 _(b"%s not found in the transaction") % self._indexfile
2909 _(b"%s not found in the transaction") % self._indexfile
2908 )
2910 )
2909
2911
2910 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2912 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2911 tr.add(self._datafile, 0)
2913 tr.add(self._datafile, 0)
2912
2914
2913 new_index_file_path = None
2915 new_index_file_path = None
2914 old_index_file_path = self._indexfile
2916 old_index_file_path = self._indexfile
2915 new_index_file_path = self._split_index_file
2917 new_index_file_path = self._split_index_file
2916 opener = self.opener
2918 opener = self.opener
2917 weak_self = weakref.ref(self)
2919 weak_self = weakref.ref(self)
2918
2920
2919 # the "split" index replace the real index when the transaction is
2921 # the "split" index replace the real index when the transaction is
2920 # finalized
2922 # finalized
2921 def finalize_callback(tr):
2923 def finalize_callback(tr):
2922 opener.rename(
2924 opener.rename(
2923 new_index_file_path,
2925 new_index_file_path,
2924 old_index_file_path,
2926 old_index_file_path,
2925 checkambig=True,
2927 checkambig=True,
2926 )
2928 )
2927 maybe_self = weak_self()
2929 maybe_self = weak_self()
2928 if maybe_self is not None:
2930 if maybe_self is not None:
2929 maybe_self._indexfile = old_index_file_path
2931 maybe_self._indexfile = old_index_file_path
2930 maybe_self._inner.index_file = maybe_self._indexfile
2932 maybe_self._inner.index_file = maybe_self._indexfile
2931
2933
2932 def abort_callback(tr):
2934 def abort_callback(tr):
2933 maybe_self = weak_self()
2935 maybe_self = weak_self()
2934 if maybe_self is not None:
2936 if maybe_self is not None:
2935 maybe_self._indexfile = old_index_file_path
2937 maybe_self._indexfile = old_index_file_path
2936 maybe_self._inner.inline = True
2938 maybe_self._inner.inline = True
2937 maybe_self._inner.index_file = old_index_file_path
2939 maybe_self._inner.index_file = old_index_file_path
2938
2940
2939 tr.registertmp(new_index_file_path)
2941 tr.registertmp(new_index_file_path)
2940 # we use 001 here to make this this happens after the finalisation of
2942 # we use 001 here to make this this happens after the finalisation of
2941 # pending changelog write (using 000). Otherwise the two finalizer
2943 # pending changelog write (using 000). Otherwise the two finalizer
2942 # would step over each other and delete the changelog.i file.
2944 # would step over each other and delete the changelog.i file.
2943 if self.target[1] is not None:
2945 if self.target[1] is not None:
2944 callback_id = b'001-revlog-split-%d-%s' % self.target
2946 callback_id = b'001-revlog-split-%d-%s' % self.target
2945 else:
2947 else:
2946 callback_id = b'001-revlog-split-%d' % self.target[0]
2948 callback_id = b'001-revlog-split-%d' % self.target[0]
2947 tr.addfinalize(callback_id, finalize_callback)
2949 tr.addfinalize(callback_id, finalize_callback)
2948 tr.addabort(callback_id, abort_callback)
2950 tr.addabort(callback_id, abort_callback)
2949
2951
2950 self._format_flags &= ~FLAG_INLINE_DATA
2952 self._format_flags &= ~FLAG_INLINE_DATA
2951 self._inner.split_inline(
2953 self._inner.split_inline(
2952 tr,
2954 tr,
2953 self._format_flags | self._format_version,
2955 self._format_flags | self._format_version,
2954 new_index_file_path=new_index_file_path,
2956 new_index_file_path=new_index_file_path,
2955 )
2957 )
2956
2958
2957 self._inline = False
2959 self._inline = False
2958 if new_index_file_path is not None:
2960 if new_index_file_path is not None:
2959 self._indexfile = new_index_file_path
2961 self._indexfile = new_index_file_path
2960
2962
2961 nodemaputil.setup_persistent_nodemap(tr, self)
2963 nodemaputil.setup_persistent_nodemap(tr, self)
2962
2964
2963 def _nodeduplicatecallback(self, transaction, node):
2965 def _nodeduplicatecallback(self, transaction, node):
2964 """called when trying to add a node already stored."""
2966 """called when trying to add a node already stored."""
2965
2967
2966 @contextlib.contextmanager
2968 @contextlib.contextmanager
2967 def reading(self):
2969 def reading(self):
2968 with self._inner.reading():
2970 with self._inner.reading():
2969 yield
2971 yield
2970
2972
2971 @contextlib.contextmanager
2973 @contextlib.contextmanager
2972 def _writing(self, transaction):
2974 def _writing(self, transaction):
2973 if self._trypending:
2975 if self._trypending:
2974 msg = b'try to write in a `trypending` revlog: %s'
2976 msg = b'try to write in a `trypending` revlog: %s'
2975 msg %= self.display_id
2977 msg %= self.display_id
2976 raise error.ProgrammingError(msg)
2978 raise error.ProgrammingError(msg)
2977 if self._inner.is_writing:
2979 if self._inner.is_writing:
2978 yield
2980 yield
2979 else:
2981 else:
2980 data_end = None
2982 data_end = None
2981 sidedata_end = None
2983 sidedata_end = None
2982 if self._docket is not None:
2984 if self._docket is not None:
2983 data_end = self._docket.data_end
2985 data_end = self._docket.data_end
2984 sidedata_end = self._docket.sidedata_end
2986 sidedata_end = self._docket.sidedata_end
2985 with self._inner.writing(
2987 with self._inner.writing(
2986 transaction,
2988 transaction,
2987 data_end=data_end,
2989 data_end=data_end,
2988 sidedata_end=sidedata_end,
2990 sidedata_end=sidedata_end,
2989 ):
2991 ):
2990 yield
2992 yield
2991 if self._docket is not None:
2993 if self._docket is not None:
2992 self._write_docket(transaction)
2994 self._write_docket(transaction)
2993
2995
2994 @property
2996 @property
2995 def is_delaying(self):
2997 def is_delaying(self):
2996 return self._inner.is_delaying
2998 return self._inner.is_delaying
2997
2999
2998 def _write_docket(self, transaction):
3000 def _write_docket(self, transaction):
2999 """write the current docket on disk
3001 """write the current docket on disk
3000
3002
3001 Exist as a method to help changelog to implement transaction logic
3003 Exist as a method to help changelog to implement transaction logic
3002
3004
3003 We could also imagine using the same transaction logic for all revlog
3005 We could also imagine using the same transaction logic for all revlog
3004 since docket are cheap."""
3006 since docket are cheap."""
3005 self._docket.write(transaction)
3007 self._docket.write(transaction)
3006
3008
3007 def addrevision(
3009 def addrevision(
3008 self,
3010 self,
3009 text,
3011 text,
3010 transaction,
3012 transaction,
3011 link,
3013 link,
3012 p1,
3014 p1,
3013 p2,
3015 p2,
3014 cachedelta=None,
3016 cachedelta=None,
3015 node=None,
3017 node=None,
3016 flags=REVIDX_DEFAULT_FLAGS,
3018 flags=REVIDX_DEFAULT_FLAGS,
3017 deltacomputer=None,
3019 deltacomputer=None,
3018 sidedata=None,
3020 sidedata=None,
3019 ):
3021 ):
3020 """add a revision to the log
3022 """add a revision to the log
3021
3023
3022 text - the revision data to add
3024 text - the revision data to add
3023 transaction - the transaction object used for rollback
3025 transaction - the transaction object used for rollback
3024 link - the linkrev data to add
3026 link - the linkrev data to add
3025 p1, p2 - the parent nodeids of the revision
3027 p1, p2 - the parent nodeids of the revision
3026 cachedelta - an optional precomputed delta
3028 cachedelta - an optional precomputed delta
3027 node - nodeid of revision; typically node is not specified, and it is
3029 node - nodeid of revision; typically node is not specified, and it is
3028 computed by default as hash(text, p1, p2), however subclasses might
3030 computed by default as hash(text, p1, p2), however subclasses might
3029 use different hashing method (and override checkhash() in such case)
3031 use different hashing method (and override checkhash() in such case)
3030 flags - the known flags to set on the revision
3032 flags - the known flags to set on the revision
3031 deltacomputer - an optional deltacomputer instance shared between
3033 deltacomputer - an optional deltacomputer instance shared between
3032 multiple calls
3034 multiple calls
3033 """
3035 """
3034 if link == nullrev:
3036 if link == nullrev:
3035 raise error.RevlogError(
3037 raise error.RevlogError(
3036 _(b"attempted to add linkrev -1 to %s") % self.display_id
3038 _(b"attempted to add linkrev -1 to %s") % self.display_id
3037 )
3039 )
3038
3040
3039 if sidedata is None:
3041 if sidedata is None:
3040 sidedata = {}
3042 sidedata = {}
3041 elif sidedata and not self.feature_config.has_side_data:
3043 elif sidedata and not self.feature_config.has_side_data:
3042 raise error.ProgrammingError(
3044 raise error.ProgrammingError(
3043 _(b"trying to add sidedata to a revlog who don't support them")
3045 _(b"trying to add sidedata to a revlog who don't support them")
3044 )
3046 )
3045
3047
3046 if flags:
3048 if flags:
3047 node = node or self.hash(text, p1, p2)
3049 node = node or self.hash(text, p1, p2)
3048
3050
3049 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3051 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3050
3052
3051 # If the flag processor modifies the revision data, ignore any provided
3053 # If the flag processor modifies the revision data, ignore any provided
3052 # cachedelta.
3054 # cachedelta.
3053 if rawtext != text:
3055 if rawtext != text:
3054 cachedelta = None
3056 cachedelta = None
3055
3057
3056 if len(rawtext) > _maxentrysize:
3058 if len(rawtext) > _maxentrysize:
3057 raise error.RevlogError(
3059 raise error.RevlogError(
3058 _(
3060 _(
3059 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3061 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3060 )
3062 )
3061 % (self.display_id, len(rawtext))
3063 % (self.display_id, len(rawtext))
3062 )
3064 )
3063
3065
3064 node = node or self.hash(rawtext, p1, p2)
3066 node = node or self.hash(rawtext, p1, p2)
3065 rev = self.index.get_rev(node)
3067 rev = self.index.get_rev(node)
3066 if rev is not None:
3068 if rev is not None:
3067 return rev
3069 return rev
3068
3070
3069 if validatehash:
3071 if validatehash:
3070 self.checkhash(rawtext, node, p1=p1, p2=p2)
3072 self.checkhash(rawtext, node, p1=p1, p2=p2)
3071
3073
3072 return self.addrawrevision(
3074 return self.addrawrevision(
3073 rawtext,
3075 rawtext,
3074 transaction,
3076 transaction,
3075 link,
3077 link,
3076 p1,
3078 p1,
3077 p2,
3079 p2,
3078 node,
3080 node,
3079 flags,
3081 flags,
3080 cachedelta=cachedelta,
3082 cachedelta=cachedelta,
3081 deltacomputer=deltacomputer,
3083 deltacomputer=deltacomputer,
3082 sidedata=sidedata,
3084 sidedata=sidedata,
3083 )
3085 )
3084
3086
3085 def addrawrevision(
3087 def addrawrevision(
3086 self,
3088 self,
3087 rawtext,
3089 rawtext,
3088 transaction,
3090 transaction,
3089 link,
3091 link,
3090 p1,
3092 p1,
3091 p2,
3093 p2,
3092 node,
3094 node,
3093 flags,
3095 flags,
3094 cachedelta=None,
3096 cachedelta=None,
3095 deltacomputer=None,
3097 deltacomputer=None,
3096 sidedata=None,
3098 sidedata=None,
3097 ):
3099 ):
3098 """add a raw revision with known flags, node and parents
3100 """add a raw revision with known flags, node and parents
3099 useful when reusing a revision not stored in this revlog (ex: received
3101 useful when reusing a revision not stored in this revlog (ex: received
3100 over wire, or read from an external bundle).
3102 over wire, or read from an external bundle).
3101 """
3103 """
3102 with self._writing(transaction):
3104 with self._writing(transaction):
3103 return self._addrevision(
3105 return self._addrevision(
3104 node,
3106 node,
3105 rawtext,
3107 rawtext,
3106 transaction,
3108 transaction,
3107 link,
3109 link,
3108 p1,
3110 p1,
3109 p2,
3111 p2,
3110 flags,
3112 flags,
3111 cachedelta,
3113 cachedelta,
3112 deltacomputer=deltacomputer,
3114 deltacomputer=deltacomputer,
3113 sidedata=sidedata,
3115 sidedata=sidedata,
3114 )
3116 )
3115
3117
3116 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
3118 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
3117 return self._inner.compress(data)
3119 return self._inner.compress(data)
3118
3120
3119 def decompress(self, data):
3121 def decompress(self, data):
3120 return self._inner.decompress(data)
3122 return self._inner.decompress(data)
3121
3123
3122 def _addrevision(
3124 def _addrevision(
3123 self,
3125 self,
3124 node,
3126 node,
3125 rawtext,
3127 rawtext,
3126 transaction,
3128 transaction,
3127 link,
3129 link,
3128 p1,
3130 p1,
3129 p2,
3131 p2,
3130 flags,
3132 flags,
3131 cachedelta,
3133 cachedelta,
3132 alwayscache=False,
3134 alwayscache=False,
3133 deltacomputer=None,
3135 deltacomputer=None,
3134 sidedata=None,
3136 sidedata=None,
3135 ):
3137 ):
3136 """internal function to add revisions to the log
3138 """internal function to add revisions to the log
3137
3139
3138 see addrevision for argument descriptions.
3140 see addrevision for argument descriptions.
3139
3141
3140 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3142 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3141
3143
3142 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3144 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3143 be used.
3145 be used.
3144
3146
3145 invariants:
3147 invariants:
3146 - rawtext is optional (can be None); if not set, cachedelta must be set.
3148 - rawtext is optional (can be None); if not set, cachedelta must be set.
3147 if both are set, they must correspond to each other.
3149 if both are set, they must correspond to each other.
3148 """
3150 """
3149 if node == self.nullid:
3151 if node == self.nullid:
3150 raise error.RevlogError(
3152 raise error.RevlogError(
3151 _(b"%s: attempt to add null revision") % self.display_id
3153 _(b"%s: attempt to add null revision") % self.display_id
3152 )
3154 )
3153 if (
3155 if (
3154 node == self.nodeconstants.wdirid
3156 node == self.nodeconstants.wdirid
3155 or node in self.nodeconstants.wdirfilenodeids
3157 or node in self.nodeconstants.wdirfilenodeids
3156 ):
3158 ):
3157 raise error.RevlogError(
3159 raise error.RevlogError(
3158 _(b"%s: attempt to add wdir revision") % self.display_id
3160 _(b"%s: attempt to add wdir revision") % self.display_id
3159 )
3161 )
3160 if self._inner._writinghandles is None:
3162 if self._inner._writinghandles is None:
3161 msg = b'adding revision outside `revlog._writing` context'
3163 msg = b'adding revision outside `revlog._writing` context'
3162 raise error.ProgrammingError(msg)
3164 raise error.ProgrammingError(msg)
3163
3165
3164 btext = [rawtext]
3166 btext = [rawtext]
3165
3167
3166 curr = len(self)
3168 curr = len(self)
3167 prev = curr - 1
3169 prev = curr - 1
3168
3170
3169 offset = self._get_data_offset(prev)
3171 offset = self._get_data_offset(prev)
3170
3172
3171 if self._concurrencychecker:
3173 if self._concurrencychecker:
3172 ifh, dfh, sdfh = self._inner._writinghandles
3174 ifh, dfh, sdfh = self._inner._writinghandles
3173 # XXX no checking for the sidedata file
3175 # XXX no checking for the sidedata file
3174 if self._inline:
3176 if self._inline:
3175 # offset is "as if" it were in the .d file, so we need to add on
3177 # offset is "as if" it were in the .d file, so we need to add on
3176 # the size of the entry metadata.
3178 # the size of the entry metadata.
3177 self._concurrencychecker(
3179 self._concurrencychecker(
3178 ifh, self._indexfile, offset + curr * self.index.entry_size
3180 ifh, self._indexfile, offset + curr * self.index.entry_size
3179 )
3181 )
3180 else:
3182 else:
3181 # Entries in the .i are a consistent size.
3183 # Entries in the .i are a consistent size.
3182 self._concurrencychecker(
3184 self._concurrencychecker(
3183 ifh, self._indexfile, curr * self.index.entry_size
3185 ifh, self._indexfile, curr * self.index.entry_size
3184 )
3186 )
3185 self._concurrencychecker(dfh, self._datafile, offset)
3187 self._concurrencychecker(dfh, self._datafile, offset)
3186
3188
3187 p1r, p2r = self.rev(p1), self.rev(p2)
3189 p1r, p2r = self.rev(p1), self.rev(p2)
3188
3190
3189 # full versions are inserted when the needed deltas
3191 # full versions are inserted when the needed deltas
3190 # become comparable to the uncompressed text
3192 # become comparable to the uncompressed text
3191 if rawtext is None:
3193 if rawtext is None:
3192 # need rawtext size, before changed by flag processors, which is
3194 # need rawtext size, before changed by flag processors, which is
3193 # the non-raw size. use revlog explicitly to avoid filelog's extra
3195 # the non-raw size. use revlog explicitly to avoid filelog's extra
3194 # logic that might remove metadata size.
3196 # logic that might remove metadata size.
3195 textlen = mdiff.patchedsize(
3197 textlen = mdiff.patchedsize(
3196 revlog.size(self, cachedelta[0]), cachedelta[1]
3198 revlog.size(self, cachedelta[0]), cachedelta[1]
3197 )
3199 )
3198 else:
3200 else:
3199 textlen = len(rawtext)
3201 textlen = len(rawtext)
3200
3202
3201 if deltacomputer is None:
3203 if deltacomputer is None:
3202 write_debug = None
3204 write_debug = None
3203 if self.delta_config.debug_delta:
3205 if self.delta_config.debug_delta:
3204 write_debug = transaction._report
3206 write_debug = transaction._report
3205 deltacomputer = deltautil.deltacomputer(
3207 deltacomputer = deltautil.deltacomputer(
3206 self, write_debug=write_debug
3208 self, write_debug=write_debug
3207 )
3209 )
3208
3210
3209 if cachedelta is not None and len(cachedelta) == 2:
3211 if cachedelta is not None and len(cachedelta) == 2:
3210 # If the cached delta has no information about how it should be
3212 # If the cached delta has no information about how it should be
3211 # reused, add the default reuse instruction according to the
3213 # reused, add the default reuse instruction according to the
3212 # revlog's configuration.
3214 # revlog's configuration.
3213 if (
3215 if (
3214 self.delta_config.general_delta
3216 self.delta_config.general_delta
3215 and self.delta_config.lazy_delta_base
3217 and self.delta_config.lazy_delta_base
3216 ):
3218 ):
3217 delta_base_reuse = DELTA_BASE_REUSE_TRY
3219 delta_base_reuse = DELTA_BASE_REUSE_TRY
3218 else:
3220 else:
3219 delta_base_reuse = DELTA_BASE_REUSE_NO
3221 delta_base_reuse = DELTA_BASE_REUSE_NO
3220 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3222 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3221
3223
3222 revinfo = revlogutils.revisioninfo(
3224 revinfo = revlogutils.revisioninfo(
3223 node,
3225 node,
3224 p1,
3226 p1,
3225 p2,
3227 p2,
3226 btext,
3228 btext,
3227 textlen,
3229 textlen,
3228 cachedelta,
3230 cachedelta,
3229 flags,
3231 flags,
3230 )
3232 )
3231
3233
3232 deltainfo = deltacomputer.finddeltainfo(revinfo)
3234 deltainfo = deltacomputer.finddeltainfo(revinfo)
3233
3235
3234 compression_mode = COMP_MODE_INLINE
3236 compression_mode = COMP_MODE_INLINE
3235 if self._docket is not None:
3237 if self._docket is not None:
3236 default_comp = self._docket.default_compression_header
3238 default_comp = self._docket.default_compression_header
3237 r = deltautil.delta_compression(default_comp, deltainfo)
3239 r = deltautil.delta_compression(default_comp, deltainfo)
3238 compression_mode, deltainfo = r
3240 compression_mode, deltainfo = r
3239
3241
3240 sidedata_compression_mode = COMP_MODE_INLINE
3242 sidedata_compression_mode = COMP_MODE_INLINE
3241 if sidedata and self.feature_config.has_side_data:
3243 if sidedata and self.feature_config.has_side_data:
3242 sidedata_compression_mode = COMP_MODE_PLAIN
3244 sidedata_compression_mode = COMP_MODE_PLAIN
3243 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3245 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3244 sidedata_offset = self._docket.sidedata_end
3246 sidedata_offset = self._docket.sidedata_end
3245 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3247 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3246 if (
3248 if (
3247 h != b'u'
3249 h != b'u'
3248 and comp_sidedata[0:1] != b'\0'
3250 and comp_sidedata[0:1] != b'\0'
3249 and len(comp_sidedata) < len(serialized_sidedata)
3251 and len(comp_sidedata) < len(serialized_sidedata)
3250 ):
3252 ):
3251 assert not h
3253 assert not h
3252 if (
3254 if (
3253 comp_sidedata[0:1]
3255 comp_sidedata[0:1]
3254 == self._docket.default_compression_header
3256 == self._docket.default_compression_header
3255 ):
3257 ):
3256 sidedata_compression_mode = COMP_MODE_DEFAULT
3258 sidedata_compression_mode = COMP_MODE_DEFAULT
3257 serialized_sidedata = comp_sidedata
3259 serialized_sidedata = comp_sidedata
3258 else:
3260 else:
3259 sidedata_compression_mode = COMP_MODE_INLINE
3261 sidedata_compression_mode = COMP_MODE_INLINE
3260 serialized_sidedata = comp_sidedata
3262 serialized_sidedata = comp_sidedata
3261 else:
3263 else:
3262 serialized_sidedata = b""
3264 serialized_sidedata = b""
3263 # Don't store the offset if the sidedata is empty, that way
3265 # Don't store the offset if the sidedata is empty, that way
3264 # we can easily detect empty sidedata and they will be no different
3266 # we can easily detect empty sidedata and they will be no different
3265 # than ones we manually add.
3267 # than ones we manually add.
3266 sidedata_offset = 0
3268 sidedata_offset = 0
3267
3269
3268 rank = RANK_UNKNOWN
3270 rank = RANK_UNKNOWN
3269 if self.feature_config.compute_rank:
3271 if self.feature_config.compute_rank:
3270 if (p1r, p2r) == (nullrev, nullrev):
3272 if (p1r, p2r) == (nullrev, nullrev):
3271 rank = 1
3273 rank = 1
3272 elif p1r != nullrev and p2r == nullrev:
3274 elif p1r != nullrev and p2r == nullrev:
3273 rank = 1 + self.fast_rank(p1r)
3275 rank = 1 + self.fast_rank(p1r)
3274 elif p1r == nullrev and p2r != nullrev:
3276 elif p1r == nullrev and p2r != nullrev:
3275 rank = 1 + self.fast_rank(p2r)
3277 rank = 1 + self.fast_rank(p2r)
3276 else: # merge node
3278 else: # merge node
3277 if rustdagop is not None and self.index.rust_ext_compat:
3279 if rustdagop is not None and self.index.rust_ext_compat:
3278 rank = rustdagop.rank(self.index, p1r, p2r)
3280 rank = rustdagop.rank(self.index, p1r, p2r)
3279 else:
3281 else:
3280 pmin, pmax = sorted((p1r, p2r))
3282 pmin, pmax = sorted((p1r, p2r))
3281 rank = 1 + self.fast_rank(pmax)
3283 rank = 1 + self.fast_rank(pmax)
3282 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3284 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3283
3285
3284 e = revlogutils.entry(
3286 e = revlogutils.entry(
3285 flags=flags,
3287 flags=flags,
3286 data_offset=offset,
3288 data_offset=offset,
3287 data_compressed_length=deltainfo.deltalen,
3289 data_compressed_length=deltainfo.deltalen,
3288 data_uncompressed_length=textlen,
3290 data_uncompressed_length=textlen,
3289 data_compression_mode=compression_mode,
3291 data_compression_mode=compression_mode,
3290 data_delta_base=deltainfo.base,
3292 data_delta_base=deltainfo.base,
3291 link_rev=link,
3293 link_rev=link,
3292 parent_rev_1=p1r,
3294 parent_rev_1=p1r,
3293 parent_rev_2=p2r,
3295 parent_rev_2=p2r,
3294 node_id=node,
3296 node_id=node,
3295 sidedata_offset=sidedata_offset,
3297 sidedata_offset=sidedata_offset,
3296 sidedata_compressed_length=len(serialized_sidedata),
3298 sidedata_compressed_length=len(serialized_sidedata),
3297 sidedata_compression_mode=sidedata_compression_mode,
3299 sidedata_compression_mode=sidedata_compression_mode,
3298 rank=rank,
3300 rank=rank,
3299 )
3301 )
3300
3302
3301 self.index.append(e)
3303 self.index.append(e)
3302 entry = self.index.entry_binary(curr)
3304 entry = self.index.entry_binary(curr)
3303 if curr == 0 and self._docket is None:
3305 if curr == 0 and self._docket is None:
3304 header = self._format_flags | self._format_version
3306 header = self._format_flags | self._format_version
3305 header = self.index.pack_header(header)
3307 header = self.index.pack_header(header)
3306 entry = header + entry
3308 entry = header + entry
3307 self._writeentry(
3309 self._writeentry(
3308 transaction,
3310 transaction,
3309 entry,
3311 entry,
3310 deltainfo.data,
3312 deltainfo.data,
3311 link,
3313 link,
3312 offset,
3314 offset,
3313 serialized_sidedata,
3315 serialized_sidedata,
3314 sidedata_offset,
3316 sidedata_offset,
3315 )
3317 )
3316
3318
3317 rawtext = btext[0]
3319 rawtext = btext[0]
3318
3320
3319 if alwayscache and rawtext is None:
3321 if alwayscache and rawtext is None:
3320 rawtext = deltacomputer.buildtext(revinfo)
3322 rawtext = deltacomputer.buildtext(revinfo)
3321
3323
3322 if type(rawtext) == bytes: # only accept immutable objects
3324 if type(rawtext) == bytes: # only accept immutable objects
3323 self._inner._revisioncache = (node, curr, rawtext)
3325 self._inner._revisioncache = (node, curr, rawtext)
3324 self._chainbasecache[curr] = deltainfo.chainbase
3326 self._chainbasecache[curr] = deltainfo.chainbase
3325 return curr
3327 return curr
3326
3328
3327 def _get_data_offset(self, prev):
3329 def _get_data_offset(self, prev):
3328 """Returns the current offset in the (in-transaction) data file.
3330 """Returns the current offset in the (in-transaction) data file.
3329 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3331 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3330 file to store that information: since sidedata can be rewritten to the
3332 file to store that information: since sidedata can be rewritten to the
3331 end of the data file within a transaction, you can have cases where, for
3333 end of the data file within a transaction, you can have cases where, for
3332 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3334 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3333 to `n - 1`'s sidedata being written after `n`'s data.
3335 to `n - 1`'s sidedata being written after `n`'s data.
3334
3336
3335 TODO cache this in a docket file before getting out of experimental."""
3337 TODO cache this in a docket file before getting out of experimental."""
3336 if self._docket is None:
3338 if self._docket is None:
3337 return self.end(prev)
3339 return self.end(prev)
3338 else:
3340 else:
3339 return self._docket.data_end
3341 return self._docket.data_end
3340
3342
3341 def _writeentry(
3343 def _writeentry(
3342 self,
3344 self,
3343 transaction,
3345 transaction,
3344 entry,
3346 entry,
3345 data,
3347 data,
3346 link,
3348 link,
3347 offset,
3349 offset,
3348 sidedata,
3350 sidedata,
3349 sidedata_offset,
3351 sidedata_offset,
3350 ):
3352 ):
3351 # Files opened in a+ mode have inconsistent behavior on various
3353 # Files opened in a+ mode have inconsistent behavior on various
3352 # platforms. Windows requires that a file positioning call be made
3354 # platforms. Windows requires that a file positioning call be made
3353 # when the file handle transitions between reads and writes. See
3355 # when the file handle transitions between reads and writes. See
3354 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3356 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3355 # platforms, Python or the platform itself can be buggy. Some versions
3357 # platforms, Python or the platform itself can be buggy. Some versions
3356 # of Solaris have been observed to not append at the end of the file
3358 # of Solaris have been observed to not append at the end of the file
3357 # if the file was seeked to before the end. See issue4943 for more.
3359 # if the file was seeked to before the end. See issue4943 for more.
3358 #
3360 #
3359 # We work around this issue by inserting a seek() before writing.
3361 # We work around this issue by inserting a seek() before writing.
3360 # Note: This is likely not necessary on Python 3. However, because
3362 # Note: This is likely not necessary on Python 3. However, because
3361 # the file handle is reused for reads and may be seeked there, we need
3363 # the file handle is reused for reads and may be seeked there, we need
3362 # to be careful before changing this.
3364 # to be careful before changing this.
3363 index_end = data_end = sidedata_end = None
3365 index_end = data_end = sidedata_end = None
3364 if self._docket is not None:
3366 if self._docket is not None:
3365 index_end = self._docket.index_end
3367 index_end = self._docket.index_end
3366 data_end = self._docket.data_end
3368 data_end = self._docket.data_end
3367 sidedata_end = self._docket.sidedata_end
3369 sidedata_end = self._docket.sidedata_end
3368
3370
3369 files_end = self._inner.write_entry(
3371 files_end = self._inner.write_entry(
3370 transaction,
3372 transaction,
3371 entry,
3373 entry,
3372 data,
3374 data,
3373 link,
3375 link,
3374 offset,
3376 offset,
3375 sidedata,
3377 sidedata,
3376 sidedata_offset,
3378 sidedata_offset,
3377 index_end,
3379 index_end,
3378 data_end,
3380 data_end,
3379 sidedata_end,
3381 sidedata_end,
3380 )
3382 )
3381 self._enforceinlinesize(transaction)
3383 self._enforceinlinesize(transaction)
3382 if self._docket is not None:
3384 if self._docket is not None:
3383 self._docket.index_end = files_end[0]
3385 self._docket.index_end = files_end[0]
3384 self._docket.data_end = files_end[1]
3386 self._docket.data_end = files_end[1]
3385 self._docket.sidedata_end = files_end[2]
3387 self._docket.sidedata_end = files_end[2]
3386
3388
3387 nodemaputil.setup_persistent_nodemap(transaction, self)
3389 nodemaputil.setup_persistent_nodemap(transaction, self)
3388
3390
3389 def addgroup(
3391 def addgroup(
3390 self,
3392 self,
3391 deltas,
3393 deltas,
3392 linkmapper,
3394 linkmapper,
3393 transaction,
3395 transaction,
3394 alwayscache=False,
3396 alwayscache=False,
3395 addrevisioncb=None,
3397 addrevisioncb=None,
3396 duplicaterevisioncb=None,
3398 duplicaterevisioncb=None,
3397 debug_info=None,
3399 debug_info=None,
3398 delta_base_reuse_policy=None,
3400 delta_base_reuse_policy=None,
3399 ):
3401 ):
3400 """
3402 """
3401 add a delta group
3403 add a delta group
3402
3404
3403 given a set of deltas, add them to the revision log. the
3405 given a set of deltas, add them to the revision log. the
3404 first delta is against its parent, which should be in our
3406 first delta is against its parent, which should be in our
3405 log, the rest are against the previous delta.
3407 log, the rest are against the previous delta.
3406
3408
3407 If ``addrevisioncb`` is defined, it will be called with arguments of
3409 If ``addrevisioncb`` is defined, it will be called with arguments of
3408 this revlog and the node that was added.
3410 this revlog and the node that was added.
3409 """
3411 """
3410
3412
3411 if self._adding_group:
3413 if self._adding_group:
3412 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3414 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3413
3415
3414 # read the default delta-base reuse policy from revlog config if the
3416 # read the default delta-base reuse policy from revlog config if the
3415 # group did not specify one.
3417 # group did not specify one.
3416 if delta_base_reuse_policy is None:
3418 if delta_base_reuse_policy is None:
3417 if (
3419 if (
3418 self.delta_config.general_delta
3420 self.delta_config.general_delta
3419 and self.delta_config.lazy_delta_base
3421 and self.delta_config.lazy_delta_base
3420 ):
3422 ):
3421 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3423 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3422 else:
3424 else:
3423 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3425 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3424
3426
3425 self._adding_group = True
3427 self._adding_group = True
3426 empty = True
3428 empty = True
3427 try:
3429 try:
3428 with self._writing(transaction):
3430 with self._writing(transaction):
3429 write_debug = None
3431 write_debug = None
3430 if self.delta_config.debug_delta:
3432 if self.delta_config.debug_delta:
3431 write_debug = transaction._report
3433 write_debug = transaction._report
3432 deltacomputer = deltautil.deltacomputer(
3434 deltacomputer = deltautil.deltacomputer(
3433 self,
3435 self,
3434 write_debug=write_debug,
3436 write_debug=write_debug,
3435 debug_info=debug_info,
3437 debug_info=debug_info,
3436 )
3438 )
3437 # loop through our set of deltas
3439 # loop through our set of deltas
3438 for data in deltas:
3440 for data in deltas:
3439 (
3441 (
3440 node,
3442 node,
3441 p1,
3443 p1,
3442 p2,
3444 p2,
3443 linknode,
3445 linknode,
3444 deltabase,
3446 deltabase,
3445 delta,
3447 delta,
3446 flags,
3448 flags,
3447 sidedata,
3449 sidedata,
3448 ) = data
3450 ) = data
3449 link = linkmapper(linknode)
3451 link = linkmapper(linknode)
3450 flags = flags or REVIDX_DEFAULT_FLAGS
3452 flags = flags or REVIDX_DEFAULT_FLAGS
3451
3453
3452 rev = self.index.get_rev(node)
3454 rev = self.index.get_rev(node)
3453 if rev is not None:
3455 if rev is not None:
3454 # this can happen if two branches make the same change
3456 # this can happen if two branches make the same change
3455 self._nodeduplicatecallback(transaction, rev)
3457 self._nodeduplicatecallback(transaction, rev)
3456 if duplicaterevisioncb:
3458 if duplicaterevisioncb:
3457 duplicaterevisioncb(self, rev)
3459 duplicaterevisioncb(self, rev)
3458 empty = False
3460 empty = False
3459 continue
3461 continue
3460
3462
3461 for p in (p1, p2):
3463 for p in (p1, p2):
3462 if not self.index.has_node(p):
3464 if not self.index.has_node(p):
3463 raise error.LookupError(
3465 raise error.LookupError(
3464 p, self.radix, _(b'unknown parent')
3466 p, self.radix, _(b'unknown parent')
3465 )
3467 )
3466
3468
3467 if not self.index.has_node(deltabase):
3469 if not self.index.has_node(deltabase):
3468 raise error.LookupError(
3470 raise error.LookupError(
3469 deltabase, self.display_id, _(b'unknown delta base')
3471 deltabase, self.display_id, _(b'unknown delta base')
3470 )
3472 )
3471
3473
3472 baserev = self.rev(deltabase)
3474 baserev = self.rev(deltabase)
3473
3475
3474 if baserev != nullrev and self.iscensored(baserev):
3476 if baserev != nullrev and self.iscensored(baserev):
3475 # if base is censored, delta must be full replacement in a
3477 # if base is censored, delta must be full replacement in a
3476 # single patch operation
3478 # single patch operation
3477 hlen = struct.calcsize(b">lll")
3479 hlen = struct.calcsize(b">lll")
3478 oldlen = self.rawsize(baserev)
3480 oldlen = self.rawsize(baserev)
3479 newlen = len(delta) - hlen
3481 newlen = len(delta) - hlen
3480 if delta[:hlen] != mdiff.replacediffheader(
3482 if delta[:hlen] != mdiff.replacediffheader(
3481 oldlen, newlen
3483 oldlen, newlen
3482 ):
3484 ):
3483 raise error.CensoredBaseError(
3485 raise error.CensoredBaseError(
3484 self.display_id, self.node(baserev)
3486 self.display_id, self.node(baserev)
3485 )
3487 )
3486
3488
3487 if not flags and self._peek_iscensored(baserev, delta):
3489 if not flags and self._peek_iscensored(baserev, delta):
3488 flags |= REVIDX_ISCENSORED
3490 flags |= REVIDX_ISCENSORED
3489
3491
3490 # We assume consumers of addrevisioncb will want to retrieve
3492 # We assume consumers of addrevisioncb will want to retrieve
3491 # the added revision, which will require a call to
3493 # the added revision, which will require a call to
3492 # revision(). revision() will fast path if there is a cache
3494 # revision(). revision() will fast path if there is a cache
3493 # hit. So, we tell _addrevision() to always cache in this case.
3495 # hit. So, we tell _addrevision() to always cache in this case.
3494 # We're only using addgroup() in the context of changegroup
3496 # We're only using addgroup() in the context of changegroup
3495 # generation so the revision data can always be handled as raw
3497 # generation so the revision data can always be handled as raw
3496 # by the flagprocessor.
3498 # by the flagprocessor.
3497 rev = self._addrevision(
3499 rev = self._addrevision(
3498 node,
3500 node,
3499 None,
3501 None,
3500 transaction,
3502 transaction,
3501 link,
3503 link,
3502 p1,
3504 p1,
3503 p2,
3505 p2,
3504 flags,
3506 flags,
3505 (baserev, delta, delta_base_reuse_policy),
3507 (baserev, delta, delta_base_reuse_policy),
3506 alwayscache=alwayscache,
3508 alwayscache=alwayscache,
3507 deltacomputer=deltacomputer,
3509 deltacomputer=deltacomputer,
3508 sidedata=sidedata,
3510 sidedata=sidedata,
3509 )
3511 )
3510
3512
3511 if addrevisioncb:
3513 if addrevisioncb:
3512 addrevisioncb(self, rev)
3514 addrevisioncb(self, rev)
3513 empty = False
3515 empty = False
3514 finally:
3516 finally:
3515 self._adding_group = False
3517 self._adding_group = False
3516 return not empty
3518 return not empty
3517
3519
3518 def iscensored(self, rev):
3520 def iscensored(self, rev):
3519 """Check if a file revision is censored."""
3521 """Check if a file revision is censored."""
3520 if not self.feature_config.censorable:
3522 if not self.feature_config.censorable:
3521 return False
3523 return False
3522
3524
3523 return self.flags(rev) & REVIDX_ISCENSORED
3525 return self.flags(rev) & REVIDX_ISCENSORED
3524
3526
3525 def _peek_iscensored(self, baserev, delta):
3527 def _peek_iscensored(self, baserev, delta):
3526 """Quickly check if a delta produces a censored revision."""
3528 """Quickly check if a delta produces a censored revision."""
3527 if not self.feature_config.censorable:
3529 if not self.feature_config.censorable:
3528 return False
3530 return False
3529
3531
3530 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3532 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3531
3533
3532 def getstrippoint(self, minlink):
3534 def getstrippoint(self, minlink):
3533 """find the minimum rev that must be stripped to strip the linkrev
3535 """find the minimum rev that must be stripped to strip the linkrev
3534
3536
3535 Returns a tuple containing the minimum rev and a set of all revs that
3537 Returns a tuple containing the minimum rev and a set of all revs that
3536 have linkrevs that will be broken by this strip.
3538 have linkrevs that will be broken by this strip.
3537 """
3539 """
3538 return storageutil.resolvestripinfo(
3540 return storageutil.resolvestripinfo(
3539 minlink,
3541 minlink,
3540 len(self) - 1,
3542 len(self) - 1,
3541 self.headrevs(),
3543 self.headrevs(),
3542 self.linkrev,
3544 self.linkrev,
3543 self.parentrevs,
3545 self.parentrevs,
3544 )
3546 )
3545
3547
3546 def strip(self, minlink, transaction):
3548 def strip(self, minlink, transaction):
3547 """truncate the revlog on the first revision with a linkrev >= minlink
3549 """truncate the revlog on the first revision with a linkrev >= minlink
3548
3550
3549 This function is called when we're stripping revision minlink and
3551 This function is called when we're stripping revision minlink and
3550 its descendants from the repository.
3552 its descendants from the repository.
3551
3553
3552 We have to remove all revisions with linkrev >= minlink, because
3554 We have to remove all revisions with linkrev >= minlink, because
3553 the equivalent changelog revisions will be renumbered after the
3555 the equivalent changelog revisions will be renumbered after the
3554 strip.
3556 strip.
3555
3557
3556 So we truncate the revlog on the first of these revisions, and
3558 So we truncate the revlog on the first of these revisions, and
3557 trust that the caller has saved the revisions that shouldn't be
3559 trust that the caller has saved the revisions that shouldn't be
3558 removed and that it'll re-add them after this truncation.
3560 removed and that it'll re-add them after this truncation.
3559 """
3561 """
3560 if len(self) == 0:
3562 if len(self) == 0:
3561 return
3563 return
3562
3564
3563 rev, _ = self.getstrippoint(minlink)
3565 rev, _ = self.getstrippoint(minlink)
3564 if rev == len(self):
3566 if rev == len(self):
3565 return
3567 return
3566
3568
3567 # first truncate the files on disk
3569 # first truncate the files on disk
3568 data_end = self.start(rev)
3570 data_end = self.start(rev)
3569 if not self._inline:
3571 if not self._inline:
3570 transaction.add(self._datafile, data_end)
3572 transaction.add(self._datafile, data_end)
3571 end = rev * self.index.entry_size
3573 end = rev * self.index.entry_size
3572 else:
3574 else:
3573 end = data_end + (rev * self.index.entry_size)
3575 end = data_end + (rev * self.index.entry_size)
3574
3576
3575 if self._sidedatafile:
3577 if self._sidedatafile:
3576 sidedata_end = self.sidedata_cut_off(rev)
3578 sidedata_end = self.sidedata_cut_off(rev)
3577 transaction.add(self._sidedatafile, sidedata_end)
3579 transaction.add(self._sidedatafile, sidedata_end)
3578
3580
3579 transaction.add(self._indexfile, end)
3581 transaction.add(self._indexfile, end)
3580 if self._docket is not None:
3582 if self._docket is not None:
3581 # XXX we could, leverage the docket while stripping. However it is
3583 # XXX we could, leverage the docket while stripping. However it is
3582 # not powerfull enough at the time of this comment
3584 # not powerfull enough at the time of this comment
3583 self._docket.index_end = end
3585 self._docket.index_end = end
3584 self._docket.data_end = data_end
3586 self._docket.data_end = data_end
3585 self._docket.sidedata_end = sidedata_end
3587 self._docket.sidedata_end = sidedata_end
3586 self._docket.write(transaction, stripping=True)
3588 self._docket.write(transaction, stripping=True)
3587
3589
3588 # then reset internal state in memory to forget those revisions
3590 # then reset internal state in memory to forget those revisions
3589 self._chaininfocache = util.lrucachedict(500)
3591 self._chaininfocache = util.lrucachedict(500)
3590 self._inner.clear_cache()
3592 self._inner.clear_cache()
3591
3593
3592 del self.index[rev:-1]
3594 del self.index[rev:-1]
3593
3595
3594 def checksize(self):
3596 def checksize(self):
3595 """Check size of index and data files
3597 """Check size of index and data files
3596
3598
3597 return a (dd, di) tuple.
3599 return a (dd, di) tuple.
3598 - dd: extra bytes for the "data" file
3600 - dd: extra bytes for the "data" file
3599 - di: extra bytes for the "index" file
3601 - di: extra bytes for the "index" file
3600
3602
3601 A healthy revlog will return (0, 0).
3603 A healthy revlog will return (0, 0).
3602 """
3604 """
3603 expected = 0
3605 expected = 0
3604 if len(self):
3606 if len(self):
3605 expected = max(0, self.end(len(self) - 1))
3607 expected = max(0, self.end(len(self) - 1))
3606
3608
3607 try:
3609 try:
3608 with self._datafp() as f:
3610 with self._datafp() as f:
3609 f.seek(0, io.SEEK_END)
3611 f.seek(0, io.SEEK_END)
3610 actual = f.tell()
3612 actual = f.tell()
3611 dd = actual - expected
3613 dd = actual - expected
3612 except FileNotFoundError:
3614 except FileNotFoundError:
3613 dd = 0
3615 dd = 0
3614
3616
3615 try:
3617 try:
3616 f = self.opener(self._indexfile)
3618 f = self.opener(self._indexfile)
3617 f.seek(0, io.SEEK_END)
3619 f.seek(0, io.SEEK_END)
3618 actual = f.tell()
3620 actual = f.tell()
3619 f.close()
3621 f.close()
3620 s = self.index.entry_size
3622 s = self.index.entry_size
3621 i = max(0, actual // s)
3623 i = max(0, actual // s)
3622 di = actual - (i * s)
3624 di = actual - (i * s)
3623 if self._inline:
3625 if self._inline:
3624 databytes = 0
3626 databytes = 0
3625 for r in self:
3627 for r in self:
3626 databytes += max(0, self.length(r))
3628 databytes += max(0, self.length(r))
3627 dd = 0
3629 dd = 0
3628 di = actual - len(self) * s - databytes
3630 di = actual - len(self) * s - databytes
3629 except FileNotFoundError:
3631 except FileNotFoundError:
3630 di = 0
3632 di = 0
3631
3633
3632 return (dd, di)
3634 return (dd, di)
3633
3635
3634 def files(self):
3636 def files(self):
3635 """return list of files that compose this revlog"""
3637 """return list of files that compose this revlog"""
3636 res = [self._indexfile]
3638 res = [self._indexfile]
3637 if self._docket_file is None:
3639 if self._docket_file is None:
3638 if not self._inline:
3640 if not self._inline:
3639 res.append(self._datafile)
3641 res.append(self._datafile)
3640 else:
3642 else:
3641 res.append(self._docket_file)
3643 res.append(self._docket_file)
3642 res.extend(self._docket.old_index_filepaths(include_empty=False))
3644 res.extend(self._docket.old_index_filepaths(include_empty=False))
3643 if self._docket.data_end:
3645 if self._docket.data_end:
3644 res.append(self._datafile)
3646 res.append(self._datafile)
3645 res.extend(self._docket.old_data_filepaths(include_empty=False))
3647 res.extend(self._docket.old_data_filepaths(include_empty=False))
3646 if self._docket.sidedata_end:
3648 if self._docket.sidedata_end:
3647 res.append(self._sidedatafile)
3649 res.append(self._sidedatafile)
3648 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3650 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3649 return res
3651 return res
3650
3652
3651 def emitrevisions(
3653 def emitrevisions(
3652 self,
3654 self,
3653 nodes,
3655 nodes,
3654 nodesorder=None,
3656 nodesorder=None,
3655 revisiondata=False,
3657 revisiondata=False,
3656 assumehaveparentrevisions=False,
3658 assumehaveparentrevisions=False,
3657 deltamode=repository.CG_DELTAMODE_STD,
3659 deltamode=repository.CG_DELTAMODE_STD,
3658 sidedata_helpers=None,
3660 sidedata_helpers=None,
3659 debug_info=None,
3661 debug_info=None,
3660 ):
3662 ):
3661 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3663 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3662 raise error.ProgrammingError(
3664 raise error.ProgrammingError(
3663 b'unhandled value for nodesorder: %s' % nodesorder
3665 b'unhandled value for nodesorder: %s' % nodesorder
3664 )
3666 )
3665
3667
3666 if nodesorder is None and not self.delta_config.general_delta:
3668 if nodesorder is None and not self.delta_config.general_delta:
3667 nodesorder = b'storage'
3669 nodesorder = b'storage'
3668
3670
3669 if (
3671 if (
3670 not self._storedeltachains
3672 not self._storedeltachains
3671 and deltamode != repository.CG_DELTAMODE_PREV
3673 and deltamode != repository.CG_DELTAMODE_PREV
3672 ):
3674 ):
3673 deltamode = repository.CG_DELTAMODE_FULL
3675 deltamode = repository.CG_DELTAMODE_FULL
3674
3676
3675 return storageutil.emitrevisions(
3677 return storageutil.emitrevisions(
3676 self,
3678 self,
3677 nodes,
3679 nodes,
3678 nodesorder,
3680 nodesorder,
3679 revlogrevisiondelta,
3681 revlogrevisiondelta,
3680 deltaparentfn=self.deltaparent,
3682 deltaparentfn=self.deltaparent,
3681 candeltafn=self._candelta,
3683 candeltafn=self._candelta,
3682 rawsizefn=self.rawsize,
3684 rawsizefn=self.rawsize,
3683 revdifffn=self.revdiff,
3685 revdifffn=self.revdiff,
3684 flagsfn=self.flags,
3686 flagsfn=self.flags,
3685 deltamode=deltamode,
3687 deltamode=deltamode,
3686 revisiondata=revisiondata,
3688 revisiondata=revisiondata,
3687 assumehaveparentrevisions=assumehaveparentrevisions,
3689 assumehaveparentrevisions=assumehaveparentrevisions,
3688 sidedata_helpers=sidedata_helpers,
3690 sidedata_helpers=sidedata_helpers,
3689 debug_info=debug_info,
3691 debug_info=debug_info,
3690 )
3692 )
3691
3693
3692 DELTAREUSEALWAYS = b'always'
3694 DELTAREUSEALWAYS = b'always'
3693 DELTAREUSESAMEREVS = b'samerevs'
3695 DELTAREUSESAMEREVS = b'samerevs'
3694 DELTAREUSENEVER = b'never'
3696 DELTAREUSENEVER = b'never'
3695
3697
3696 DELTAREUSEFULLADD = b'fulladd'
3698 DELTAREUSEFULLADD = b'fulladd'
3697
3699
3698 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3700 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3699
3701
3700 def clone(
3702 def clone(
3701 self,
3703 self,
3702 tr,
3704 tr,
3703 destrevlog,
3705 destrevlog,
3704 addrevisioncb=None,
3706 addrevisioncb=None,
3705 deltareuse=DELTAREUSESAMEREVS,
3707 deltareuse=DELTAREUSESAMEREVS,
3706 forcedeltabothparents=None,
3708 forcedeltabothparents=None,
3707 sidedata_helpers=None,
3709 sidedata_helpers=None,
3708 ):
3710 ):
3709 """Copy this revlog to another, possibly with format changes.
3711 """Copy this revlog to another, possibly with format changes.
3710
3712
3711 The destination revlog will contain the same revisions and nodes.
3713 The destination revlog will contain the same revisions and nodes.
3712 However, it may not be bit-for-bit identical due to e.g. delta encoding
3714 However, it may not be bit-for-bit identical due to e.g. delta encoding
3713 differences.
3715 differences.
3714
3716
3715 The ``deltareuse`` argument control how deltas from the existing revlog
3717 The ``deltareuse`` argument control how deltas from the existing revlog
3716 are preserved in the destination revlog. The argument can have the
3718 are preserved in the destination revlog. The argument can have the
3717 following values:
3719 following values:
3718
3720
3719 DELTAREUSEALWAYS
3721 DELTAREUSEALWAYS
3720 Deltas will always be reused (if possible), even if the destination
3722 Deltas will always be reused (if possible), even if the destination
3721 revlog would not select the same revisions for the delta. This is the
3723 revlog would not select the same revisions for the delta. This is the
3722 fastest mode of operation.
3724 fastest mode of operation.
3723 DELTAREUSESAMEREVS
3725 DELTAREUSESAMEREVS
3724 Deltas will be reused if the destination revlog would pick the same
3726 Deltas will be reused if the destination revlog would pick the same
3725 revisions for the delta. This mode strikes a balance between speed
3727 revisions for the delta. This mode strikes a balance between speed
3726 and optimization.
3728 and optimization.
3727 DELTAREUSENEVER
3729 DELTAREUSENEVER
3728 Deltas will never be reused. This is the slowest mode of execution.
3730 Deltas will never be reused. This is the slowest mode of execution.
3729 This mode can be used to recompute deltas (e.g. if the diff/delta
3731 This mode can be used to recompute deltas (e.g. if the diff/delta
3730 algorithm changes).
3732 algorithm changes).
3731 DELTAREUSEFULLADD
3733 DELTAREUSEFULLADD
3732 Revision will be re-added as if their were new content. This is
3734 Revision will be re-added as if their were new content. This is
3733 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3735 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3734 eg: large file detection and handling.
3736 eg: large file detection and handling.
3735
3737
3736 Delta computation can be slow, so the choice of delta reuse policy can
3738 Delta computation can be slow, so the choice of delta reuse policy can
3737 significantly affect run time.
3739 significantly affect run time.
3738
3740
3739 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3741 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3740 two extremes. Deltas will be reused if they are appropriate. But if the
3742 two extremes. Deltas will be reused if they are appropriate. But if the
3741 delta could choose a better revision, it will do so. This means if you
3743 delta could choose a better revision, it will do so. This means if you
3742 are converting a non-generaldelta revlog to a generaldelta revlog,
3744 are converting a non-generaldelta revlog to a generaldelta revlog,
3743 deltas will be recomputed if the delta's parent isn't a parent of the
3745 deltas will be recomputed if the delta's parent isn't a parent of the
3744 revision.
3746 revision.
3745
3747
3746 In addition to the delta policy, the ``forcedeltabothparents``
3748 In addition to the delta policy, the ``forcedeltabothparents``
3747 argument controls whether to force compute deltas against both parents
3749 argument controls whether to force compute deltas against both parents
3748 for merges. By default, the current default is used.
3750 for merges. By default, the current default is used.
3749
3751
3750 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3752 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3751 `sidedata_helpers`.
3753 `sidedata_helpers`.
3752 """
3754 """
3753 if deltareuse not in self.DELTAREUSEALL:
3755 if deltareuse not in self.DELTAREUSEALL:
3754 raise ValueError(
3756 raise ValueError(
3755 _(b'value for deltareuse invalid: %s') % deltareuse
3757 _(b'value for deltareuse invalid: %s') % deltareuse
3756 )
3758 )
3757
3759
3758 if len(destrevlog):
3760 if len(destrevlog):
3759 raise ValueError(_(b'destination revlog is not empty'))
3761 raise ValueError(_(b'destination revlog is not empty'))
3760
3762
3761 if getattr(self, 'filteredrevs', None):
3763 if getattr(self, 'filteredrevs', None):
3762 raise ValueError(_(b'source revlog has filtered revisions'))
3764 raise ValueError(_(b'source revlog has filtered revisions'))
3763 if getattr(destrevlog, 'filteredrevs', None):
3765 if getattr(destrevlog, 'filteredrevs', None):
3764 raise ValueError(_(b'destination revlog has filtered revisions'))
3766 raise ValueError(_(b'destination revlog has filtered revisions'))
3765
3767
3766 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3768 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3767 # if possible.
3769 # if possible.
3768 old_delta_config = destrevlog.delta_config
3770 old_delta_config = destrevlog.delta_config
3769 destrevlog.delta_config = destrevlog.delta_config.copy()
3771 destrevlog.delta_config = destrevlog.delta_config.copy()
3770
3772
3771 try:
3773 try:
3772 if deltareuse == self.DELTAREUSEALWAYS:
3774 if deltareuse == self.DELTAREUSEALWAYS:
3773 destrevlog.delta_config.lazy_delta_base = True
3775 destrevlog.delta_config.lazy_delta_base = True
3774 destrevlog.delta_config.lazy_delta = True
3776 destrevlog.delta_config.lazy_delta = True
3775 elif deltareuse == self.DELTAREUSESAMEREVS:
3777 elif deltareuse == self.DELTAREUSESAMEREVS:
3776 destrevlog.delta_config.lazy_delta_base = False
3778 destrevlog.delta_config.lazy_delta_base = False
3777 destrevlog.delta_config.lazy_delta = True
3779 destrevlog.delta_config.lazy_delta = True
3778 elif deltareuse == self.DELTAREUSENEVER:
3780 elif deltareuse == self.DELTAREUSENEVER:
3779 destrevlog.delta_config.lazy_delta_base = False
3781 destrevlog.delta_config.lazy_delta_base = False
3780 destrevlog.delta_config.lazy_delta = False
3782 destrevlog.delta_config.lazy_delta = False
3781
3783
3782 delta_both_parents = (
3784 delta_both_parents = (
3783 forcedeltabothparents or old_delta_config.delta_both_parents
3785 forcedeltabothparents or old_delta_config.delta_both_parents
3784 )
3786 )
3785 destrevlog.delta_config.delta_both_parents = delta_both_parents
3787 destrevlog.delta_config.delta_both_parents = delta_both_parents
3786
3788
3787 with self.reading(), destrevlog._writing(tr):
3789 with self.reading(), destrevlog._writing(tr):
3788 self._clone(
3790 self._clone(
3789 tr,
3791 tr,
3790 destrevlog,
3792 destrevlog,
3791 addrevisioncb,
3793 addrevisioncb,
3792 deltareuse,
3794 deltareuse,
3793 forcedeltabothparents,
3795 forcedeltabothparents,
3794 sidedata_helpers,
3796 sidedata_helpers,
3795 )
3797 )
3796
3798
3797 finally:
3799 finally:
3798 destrevlog.delta_config = old_delta_config
3800 destrevlog.delta_config = old_delta_config
3799
3801
3800 def _clone(
3802 def _clone(
3801 self,
3803 self,
3802 tr,
3804 tr,
3803 destrevlog,
3805 destrevlog,
3804 addrevisioncb,
3806 addrevisioncb,
3805 deltareuse,
3807 deltareuse,
3806 forcedeltabothparents,
3808 forcedeltabothparents,
3807 sidedata_helpers,
3809 sidedata_helpers,
3808 ):
3810 ):
3809 """perform the core duty of `revlog.clone` after parameter processing"""
3811 """perform the core duty of `revlog.clone` after parameter processing"""
3810 write_debug = None
3812 write_debug = None
3811 if self.delta_config.debug_delta:
3813 if self.delta_config.debug_delta:
3812 write_debug = tr._report
3814 write_debug = tr._report
3813 deltacomputer = deltautil.deltacomputer(
3815 deltacomputer = deltautil.deltacomputer(
3814 destrevlog,
3816 destrevlog,
3815 write_debug=write_debug,
3817 write_debug=write_debug,
3816 )
3818 )
3817 index = self.index
3819 index = self.index
3818 for rev in self:
3820 for rev in self:
3819 entry = index[rev]
3821 entry = index[rev]
3820
3822
3821 # Some classes override linkrev to take filtered revs into
3823 # Some classes override linkrev to take filtered revs into
3822 # account. Use raw entry from index.
3824 # account. Use raw entry from index.
3823 flags = entry[0] & 0xFFFF
3825 flags = entry[0] & 0xFFFF
3824 linkrev = entry[4]
3826 linkrev = entry[4]
3825 p1 = index[entry[5]][7]
3827 p1 = index[entry[5]][7]
3826 p2 = index[entry[6]][7]
3828 p2 = index[entry[6]][7]
3827 node = entry[7]
3829 node = entry[7]
3828
3830
3829 # (Possibly) reuse the delta from the revlog if allowed and
3831 # (Possibly) reuse the delta from the revlog if allowed and
3830 # the revlog chunk is a delta.
3832 # the revlog chunk is a delta.
3831 cachedelta = None
3833 cachedelta = None
3832 rawtext = None
3834 rawtext = None
3833 if deltareuse == self.DELTAREUSEFULLADD:
3835 if deltareuse == self.DELTAREUSEFULLADD:
3834 text = self._revisiondata(rev)
3836 text = self._revisiondata(rev)
3835 sidedata = self.sidedata(rev)
3837 sidedata = self.sidedata(rev)
3836
3838
3837 if sidedata_helpers is not None:
3839 if sidedata_helpers is not None:
3838 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3840 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3839 self, sidedata_helpers, sidedata, rev
3841 self, sidedata_helpers, sidedata, rev
3840 )
3842 )
3841 flags = flags | new_flags[0] & ~new_flags[1]
3843 flags = flags | new_flags[0] & ~new_flags[1]
3842
3844
3843 destrevlog.addrevision(
3845 destrevlog.addrevision(
3844 text,
3846 text,
3845 tr,
3847 tr,
3846 linkrev,
3848 linkrev,
3847 p1,
3849 p1,
3848 p2,
3850 p2,
3849 cachedelta=cachedelta,
3851 cachedelta=cachedelta,
3850 node=node,
3852 node=node,
3851 flags=flags,
3853 flags=flags,
3852 deltacomputer=deltacomputer,
3854 deltacomputer=deltacomputer,
3853 sidedata=sidedata,
3855 sidedata=sidedata,
3854 )
3856 )
3855 else:
3857 else:
3856 if destrevlog.delta_config.lazy_delta:
3858 if destrevlog.delta_config.lazy_delta:
3857 dp = self.deltaparent(rev)
3859 dp = self.deltaparent(rev)
3858 if dp != nullrev:
3860 if dp != nullrev:
3859 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3861 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3860
3862
3861 sidedata = None
3863 sidedata = None
3862 if not cachedelta:
3864 if not cachedelta:
3863 try:
3865 try:
3864 rawtext = self._revisiondata(rev)
3866 rawtext = self._revisiondata(rev)
3865 except error.CensoredNodeError as censored:
3867 except error.CensoredNodeError as censored:
3866 assert flags & REVIDX_ISCENSORED
3868 assert flags & REVIDX_ISCENSORED
3867 rawtext = censored.tombstone
3869 rawtext = censored.tombstone
3868 sidedata = self.sidedata(rev)
3870 sidedata = self.sidedata(rev)
3869 if sidedata is None:
3871 if sidedata is None:
3870 sidedata = self.sidedata(rev)
3872 sidedata = self.sidedata(rev)
3871
3873
3872 if sidedata_helpers is not None:
3874 if sidedata_helpers is not None:
3873 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3875 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3874 self, sidedata_helpers, sidedata, rev
3876 self, sidedata_helpers, sidedata, rev
3875 )
3877 )
3876 flags = flags | new_flags[0] & ~new_flags[1]
3878 flags = flags | new_flags[0] & ~new_flags[1]
3877
3879
3878 destrevlog._addrevision(
3880 destrevlog._addrevision(
3879 node,
3881 node,
3880 rawtext,
3882 rawtext,
3881 tr,
3883 tr,
3882 linkrev,
3884 linkrev,
3883 p1,
3885 p1,
3884 p2,
3886 p2,
3885 flags,
3887 flags,
3886 cachedelta,
3888 cachedelta,
3887 deltacomputer=deltacomputer,
3889 deltacomputer=deltacomputer,
3888 sidedata=sidedata,
3890 sidedata=sidedata,
3889 )
3891 )
3890
3892
3891 if addrevisioncb:
3893 if addrevisioncb:
3892 addrevisioncb(self, rev, node)
3894 addrevisioncb(self, rev, node)
3893
3895
3894 def censorrevision(self, tr, censor_nodes, tombstone=b''):
3896 def censorrevision(self, tr, censor_nodes, tombstone=b''):
3895 if self._format_version == REVLOGV0:
3897 if self._format_version == REVLOGV0:
3896 raise error.RevlogError(
3898 raise error.RevlogError(
3897 _(b'cannot censor with version %d revlogs')
3899 _(b'cannot censor with version %d revlogs')
3898 % self._format_version
3900 % self._format_version
3899 )
3901 )
3900 elif self._format_version == REVLOGV1:
3902 elif self._format_version == REVLOGV1:
3901 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
3903 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
3902 else:
3904 else:
3903 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
3905 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
3904
3906
3905 def verifyintegrity(self, state):
3907 def verifyintegrity(self, state) -> Iterable[RevLogProblem]:
3906 """Verifies the integrity of the revlog.
3908 """Verifies the integrity of the revlog.
3907
3909
3908 Yields ``revlogproblem`` instances describing problems that are
3910 Yields ``revlogproblem`` instances describing problems that are
3909 found.
3911 found.
3910 """
3912 """
3911 dd, di = self.checksize()
3913 dd, di = self.checksize()
3912 if dd:
3914 if dd:
3913 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3915 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3914 if di:
3916 if di:
3915 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3917 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3916
3918
3917 version = self._format_version
3919 version = self._format_version
3918
3920
3919 # The verifier tells us what version revlog we should be.
3921 # The verifier tells us what version revlog we should be.
3920 if version != state[b'expectedversion']:
3922 if version != state[b'expectedversion']:
3921 yield revlogproblem(
3923 yield revlogproblem(
3922 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3924 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3923 % (self.display_id, version, state[b'expectedversion'])
3925 % (self.display_id, version, state[b'expectedversion'])
3924 )
3926 )
3925
3927
3926 state[b'skipread'] = set()
3928 state[b'skipread'] = set()
3927 state[b'safe_renamed'] = set()
3929 state[b'safe_renamed'] = set()
3928
3930
3929 for rev in self:
3931 for rev in self:
3930 node = self.node(rev)
3932 node = self.node(rev)
3931
3933
3932 # Verify contents. 4 cases to care about:
3934 # Verify contents. 4 cases to care about:
3933 #
3935 #
3934 # common: the most common case
3936 # common: the most common case
3935 # rename: with a rename
3937 # rename: with a rename
3936 # meta: file content starts with b'\1\n', the metadata
3938 # meta: file content starts with b'\1\n', the metadata
3937 # header defined in filelog.py, but without a rename
3939 # header defined in filelog.py, but without a rename
3938 # ext: content stored externally
3940 # ext: content stored externally
3939 #
3941 #
3940 # More formally, their differences are shown below:
3942 # More formally, their differences are shown below:
3941 #
3943 #
3942 # | common | rename | meta | ext
3944 # | common | rename | meta | ext
3943 # -------------------------------------------------------
3945 # -------------------------------------------------------
3944 # flags() | 0 | 0 | 0 | not 0
3946 # flags() | 0 | 0 | 0 | not 0
3945 # renamed() | False | True | False | ?
3947 # renamed() | False | True | False | ?
3946 # rawtext[0:2]=='\1\n'| False | True | True | ?
3948 # rawtext[0:2]=='\1\n'| False | True | True | ?
3947 #
3949 #
3948 # "rawtext" means the raw text stored in revlog data, which
3950 # "rawtext" means the raw text stored in revlog data, which
3949 # could be retrieved by "rawdata(rev)". "text"
3951 # could be retrieved by "rawdata(rev)". "text"
3950 # mentioned below is "revision(rev)".
3952 # mentioned below is "revision(rev)".
3951 #
3953 #
3952 # There are 3 different lengths stored physically:
3954 # There are 3 different lengths stored physically:
3953 # 1. L1: rawsize, stored in revlog index
3955 # 1. L1: rawsize, stored in revlog index
3954 # 2. L2: len(rawtext), stored in revlog data
3956 # 2. L2: len(rawtext), stored in revlog data
3955 # 3. L3: len(text), stored in revlog data if flags==0, or
3957 # 3. L3: len(text), stored in revlog data if flags==0, or
3956 # possibly somewhere else if flags!=0
3958 # possibly somewhere else if flags!=0
3957 #
3959 #
3958 # L1 should be equal to L2. L3 could be different from them.
3960 # L1 should be equal to L2. L3 could be different from them.
3959 # "text" may or may not affect commit hash depending on flag
3961 # "text" may or may not affect commit hash depending on flag
3960 # processors (see flagutil.addflagprocessor).
3962 # processors (see flagutil.addflagprocessor).
3961 #
3963 #
3962 # | common | rename | meta | ext
3964 # | common | rename | meta | ext
3963 # -------------------------------------------------
3965 # -------------------------------------------------
3964 # rawsize() | L1 | L1 | L1 | L1
3966 # rawsize() | L1 | L1 | L1 | L1
3965 # size() | L1 | L2-LM | L1(*) | L1 (?)
3967 # size() | L1 | L2-LM | L1(*) | L1 (?)
3966 # len(rawtext) | L2 | L2 | L2 | L2
3968 # len(rawtext) | L2 | L2 | L2 | L2
3967 # len(text) | L2 | L2 | L2 | L3
3969 # len(text) | L2 | L2 | L2 | L3
3968 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3970 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3969 #
3971 #
3970 # LM: length of metadata, depending on rawtext
3972 # LM: length of metadata, depending on rawtext
3971 # (*): not ideal, see comment in filelog.size
3973 # (*): not ideal, see comment in filelog.size
3972 # (?): could be "- len(meta)" if the resolved content has
3974 # (?): could be "- len(meta)" if the resolved content has
3973 # rename metadata
3975 # rename metadata
3974 #
3976 #
3975 # Checks needed to be done:
3977 # Checks needed to be done:
3976 # 1. length check: L1 == L2, in all cases.
3978 # 1. length check: L1 == L2, in all cases.
3977 # 2. hash check: depending on flag processor, we may need to
3979 # 2. hash check: depending on flag processor, we may need to
3978 # use either "text" (external), or "rawtext" (in revlog).
3980 # use either "text" (external), or "rawtext" (in revlog).
3979
3981
3980 try:
3982 try:
3981 skipflags = state.get(b'skipflags', 0)
3983 skipflags = state.get(b'skipflags', 0)
3982 if skipflags:
3984 if skipflags:
3983 skipflags &= self.flags(rev)
3985 skipflags &= self.flags(rev)
3984
3986
3985 _verify_revision(self, skipflags, state, node)
3987 _verify_revision(self, skipflags, state, node)
3986
3988
3987 l1 = self.rawsize(rev)
3989 l1 = self.rawsize(rev)
3988 l2 = len(self.rawdata(node))
3990 l2 = len(self.rawdata(node))
3989
3991
3990 if l1 != l2:
3992 if l1 != l2:
3991 yield revlogproblem(
3993 yield revlogproblem(
3992 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3994 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3993 node=node,
3995 node=node,
3994 )
3996 )
3995
3997
3996 except error.CensoredNodeError:
3998 except error.CensoredNodeError:
3997 if state[b'erroroncensored']:
3999 if state[b'erroroncensored']:
3998 yield revlogproblem(
4000 yield revlogproblem(
3999 error=_(b'censored file data'), node=node
4001 error=_(b'censored file data'), node=node
4000 )
4002 )
4001 state[b'skipread'].add(node)
4003 state[b'skipread'].add(node)
4002 except Exception as e:
4004 except Exception as e:
4003 yield revlogproblem(
4005 yield revlogproblem(
4004 error=_(b'unpacking %s: %s')
4006 error=_(b'unpacking %s: %s')
4005 % (short(node), stringutil.forcebytestr(e)),
4007 % (short(node), stringutil.forcebytestr(e)),
4006 node=node,
4008 node=node,
4007 )
4009 )
4008 state[b'skipread'].add(node)
4010 state[b'skipread'].add(node)
4009
4011
4010 def storageinfo(
4012 def storageinfo(
4011 self,
4013 self,
4012 exclusivefiles=False,
4014 exclusivefiles=False,
4013 sharedfiles=False,
4015 sharedfiles=False,
4014 revisionscount=False,
4016 revisionscount=False,
4015 trackedsize=False,
4017 trackedsize=False,
4016 storedsize=False,
4018 storedsize=False,
4017 ):
4019 ):
4018 d = {}
4020 d = {}
4019
4021
4020 if exclusivefiles:
4022 if exclusivefiles:
4021 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4023 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4022 if not self._inline:
4024 if not self._inline:
4023 d[b'exclusivefiles'].append((self.opener, self._datafile))
4025 d[b'exclusivefiles'].append((self.opener, self._datafile))
4024
4026
4025 if sharedfiles:
4027 if sharedfiles:
4026 d[b'sharedfiles'] = []
4028 d[b'sharedfiles'] = []
4027
4029
4028 if revisionscount:
4030 if revisionscount:
4029 d[b'revisionscount'] = len(self)
4031 d[b'revisionscount'] = len(self)
4030
4032
4031 if trackedsize:
4033 if trackedsize:
4032 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4034 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4033
4035
4034 if storedsize:
4036 if storedsize:
4035 d[b'storedsize'] = sum(
4037 d[b'storedsize'] = sum(
4036 self.opener.stat(path).st_size for path in self.files()
4038 self.opener.stat(path).st_size for path in self.files()
4037 )
4039 )
4038
4040
4039 return d
4041 return d
4040
4042
4041 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4043 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4042 if not self.feature_config.has_side_data:
4044 if not self.feature_config.has_side_data:
4043 return
4045 return
4044 # revlog formats with sidedata support does not support inline
4046 # revlog formats with sidedata support does not support inline
4045 assert not self._inline
4047 assert not self._inline
4046 if not helpers[1] and not helpers[2]:
4048 if not helpers[1] and not helpers[2]:
4047 # Nothing to generate or remove
4049 # Nothing to generate or remove
4048 return
4050 return
4049
4051
4050 new_entries = []
4052 new_entries = []
4051 # append the new sidedata
4053 # append the new sidedata
4052 with self._writing(transaction):
4054 with self._writing(transaction):
4053 ifh, dfh, sdfh = self._inner._writinghandles
4055 ifh, dfh, sdfh = self._inner._writinghandles
4054 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4056 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4055
4057
4056 current_offset = sdfh.tell()
4058 current_offset = sdfh.tell()
4057 for rev in range(startrev, endrev + 1):
4059 for rev in range(startrev, endrev + 1):
4058 entry = self.index[rev]
4060 entry = self.index[rev]
4059 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4061 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4060 store=self,
4062 store=self,
4061 sidedata_helpers=helpers,
4063 sidedata_helpers=helpers,
4062 sidedata={},
4064 sidedata={},
4063 rev=rev,
4065 rev=rev,
4064 )
4066 )
4065
4067
4066 serialized_sidedata = sidedatautil.serialize_sidedata(
4068 serialized_sidedata = sidedatautil.serialize_sidedata(
4067 new_sidedata
4069 new_sidedata
4068 )
4070 )
4069
4071
4070 sidedata_compression_mode = COMP_MODE_INLINE
4072 sidedata_compression_mode = COMP_MODE_INLINE
4071 if serialized_sidedata and self.feature_config.has_side_data:
4073 if serialized_sidedata and self.feature_config.has_side_data:
4072 sidedata_compression_mode = COMP_MODE_PLAIN
4074 sidedata_compression_mode = COMP_MODE_PLAIN
4073 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4075 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4074 if (
4076 if (
4075 h != b'u'
4077 h != b'u'
4076 and comp_sidedata[0] != b'\0'
4078 and comp_sidedata[0] != b'\0'
4077 and len(comp_sidedata) < len(serialized_sidedata)
4079 and len(comp_sidedata) < len(serialized_sidedata)
4078 ):
4080 ):
4079 assert not h
4081 assert not h
4080 if (
4082 if (
4081 comp_sidedata[0]
4083 comp_sidedata[0]
4082 == self._docket.default_compression_header
4084 == self._docket.default_compression_header
4083 ):
4085 ):
4084 sidedata_compression_mode = COMP_MODE_DEFAULT
4086 sidedata_compression_mode = COMP_MODE_DEFAULT
4085 serialized_sidedata = comp_sidedata
4087 serialized_sidedata = comp_sidedata
4086 else:
4088 else:
4087 sidedata_compression_mode = COMP_MODE_INLINE
4089 sidedata_compression_mode = COMP_MODE_INLINE
4088 serialized_sidedata = comp_sidedata
4090 serialized_sidedata = comp_sidedata
4089 if entry[8] != 0 or entry[9] != 0:
4091 if entry[8] != 0 or entry[9] != 0:
4090 # rewriting entries that already have sidedata is not
4092 # rewriting entries that already have sidedata is not
4091 # supported yet, because it introduces garbage data in the
4093 # supported yet, because it introduces garbage data in the
4092 # revlog.
4094 # revlog.
4093 msg = b"rewriting existing sidedata is not supported yet"
4095 msg = b"rewriting existing sidedata is not supported yet"
4094 raise error.Abort(msg)
4096 raise error.Abort(msg)
4095
4097
4096 # Apply (potential) flags to add and to remove after running
4098 # Apply (potential) flags to add and to remove after running
4097 # the sidedata helpers
4099 # the sidedata helpers
4098 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4100 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4099 entry_update = (
4101 entry_update = (
4100 current_offset,
4102 current_offset,
4101 len(serialized_sidedata),
4103 len(serialized_sidedata),
4102 new_offset_flags,
4104 new_offset_flags,
4103 sidedata_compression_mode,
4105 sidedata_compression_mode,
4104 )
4106 )
4105
4107
4106 # the sidedata computation might have move the file cursors around
4108 # the sidedata computation might have move the file cursors around
4107 sdfh.seek(current_offset, os.SEEK_SET)
4109 sdfh.seek(current_offset, os.SEEK_SET)
4108 sdfh.write(serialized_sidedata)
4110 sdfh.write(serialized_sidedata)
4109 new_entries.append(entry_update)
4111 new_entries.append(entry_update)
4110 current_offset += len(serialized_sidedata)
4112 current_offset += len(serialized_sidedata)
4111 self._docket.sidedata_end = sdfh.tell()
4113 self._docket.sidedata_end = sdfh.tell()
4112
4114
4113 # rewrite the new index entries
4115 # rewrite the new index entries
4114 ifh.seek(startrev * self.index.entry_size)
4116 ifh.seek(startrev * self.index.entry_size)
4115 for i, e in enumerate(new_entries):
4117 for i, e in enumerate(new_entries):
4116 rev = startrev + i
4118 rev = startrev + i
4117 self.index.replace_sidedata_info(rev, *e)
4119 self.index.replace_sidedata_info(rev, *e)
4118 packed = self.index.entry_binary(rev)
4120 packed = self.index.entry_binary(rev)
4119 if rev == 0 and self._docket is None:
4121 if rev == 0 and self._docket is None:
4120 header = self._format_flags | self._format_version
4122 header = self._format_flags | self._format_version
4121 header = self.index.pack_header(header)
4123 header = self.index.pack_header(header)
4122 packed = header + packed
4124 packed = header + packed
4123 ifh.write(packed)
4125 ifh.write(packed)
@@ -1,271 +1,273 b''
1 # statichttprepo.py - simple http repository class for mercurial
1 # statichttprepo.py - simple http repository class for mercurial
2 #
2 #
3 # This provides read-only repo access to repositories exported via static http
3 # This provides read-only repo access to repositories exported via static http
4 #
4 #
5 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10
10
11 import errno
11 import errno
12
12
13 from .i18n import _
13 from .i18n import _
14 from .node import sha1nodeconstants
14 from .node import sha1nodeconstants
15 from . import (
15 from . import (
16 branchmap,
16 branchmap,
17 changelog,
17 changelog,
18 error,
18 error,
19 localrepo,
19 localrepo,
20 manifest,
20 manifest,
21 namespaces,
21 namespaces,
22 pathutil,
22 pathutil,
23 pycompat,
23 pycompat,
24 requirements as requirementsmod,
24 requirements as requirementsmod,
25 url,
25 url,
26 util,
26 util,
27 vfs as vfsmod,
27 vfs as vfsmod,
28 )
28 )
29 from .utils import (
29 from .utils import (
30 urlutil,
30 urlutil,
31 )
31 )
32
32
33 urlerr = util.urlerr
33 urlerr = util.urlerr
34 urlreq = util.urlreq
34 urlreq = util.urlreq
35
35
36
36
37 class httprangereader:
37 class httprangereader:
38 def __init__(self, url, opener):
38 def __init__(self, url, opener):
39 # we assume opener has HTTPRangeHandler
39 # we assume opener has HTTPRangeHandler
40 self.url = url
40 self.url = url
41 self.pos = 0
41 self.pos = 0
42 self.opener = opener
42 self.opener = opener
43 self.name = url
43 self.name = url
44
44
45 def __enter__(self):
45 def __enter__(self):
46 return self
46 return self
47
47
48 def __exit__(self, exc_type, exc_value, traceback):
48 def __exit__(self, exc_type, exc_value, traceback):
49 self.close()
49 self.close()
50
50
51 def seek(self, pos):
51 def seek(self, pos):
52 self.pos = pos
52 self.pos = pos
53
53
54 def read(self, bytes=None):
54 def read(self, bytes=None):
55 req = urlreq.request(pycompat.strurl(self.url))
55 req = urlreq.request(pycompat.strurl(self.url))
56 end = b''
56 end = b''
57 if bytes:
57 if bytes:
58 end = self.pos + bytes - 1
58 end = self.pos + bytes - 1
59 if self.pos or end:
59 if self.pos or end:
60 req.add_header('Range', 'bytes=%d-%s' % (self.pos, end))
60 req.add_header('Range', 'bytes=%d-%s' % (self.pos, end))
61
61
62 try:
62 try:
63 f = self.opener.open(req)
63 f = self.opener.open(req)
64 data = f.read()
64 data = f.read()
65 code = f.code
65 code = f.code
66 except urlerr.httperror as inst:
66 except urlerr.httperror as inst:
67 num = inst.code == 404 and errno.ENOENT or None
67 num = inst.code == 404 and errno.ENOENT or None
68 # Explicitly convert the exception to str as Py3 will try
68 # Explicitly convert the exception to str as Py3 will try
69 # convert it to local encoding and with as the HTTPResponse
69 # convert it to local encoding and with as the HTTPResponse
70 # instance doesn't support encode.
70 # instance doesn't support encode.
71 raise IOError(num, str(inst))
71 raise IOError(num, str(inst))
72 except urlerr.urlerror as inst:
72 except urlerr.urlerror as inst:
73 raise IOError(None, inst.reason)
73 raise IOError(None, inst.reason)
74
74
75 if code == 200:
75 if code == 200:
76 # HTTPRangeHandler does nothing if remote does not support
76 # HTTPRangeHandler does nothing if remote does not support
77 # Range headers and returns the full entity. Let's slice it.
77 # Range headers and returns the full entity. Let's slice it.
78 if bytes:
78 if bytes:
79 data = data[self.pos : self.pos + bytes]
79 data = data[self.pos : self.pos + bytes]
80 else:
80 else:
81 data = data[self.pos :]
81 data = data[self.pos :]
82 elif bytes:
82 elif bytes:
83 data = data[:bytes]
83 data = data[:bytes]
84 self.pos += len(data)
84 self.pos += len(data)
85 return data
85 return data
86
86
87 def readlines(self):
87 def readlines(self):
88 return self.read().splitlines(True)
88 return self.read().splitlines(True)
89
89
90 def __iter__(self):
90 def __iter__(self):
91 return iter(self.readlines())
91 return iter(self.readlines())
92
92
93 def close(self):
93 def close(self):
94 pass
94 pass
95
95
96
96
97 # _RangeError and _HTTPRangeHandler were originally in byterange.py,
97 # _RangeError and _HTTPRangeHandler were originally in byterange.py,
98 # which was itself extracted from urlgrabber. See the last version of
98 # which was itself extracted from urlgrabber. See the last version of
99 # byterange.py from history if you need more information.
99 # byterange.py from history if you need more information.
100 class _RangeError(IOError):
100 class _RangeError(IOError):
101 """Error raised when an unsatisfiable range is requested."""
101 """Error raised when an unsatisfiable range is requested."""
102
102
103
103
104 class _HTTPRangeHandler(urlreq.basehandler):
104 class _HTTPRangeHandler(urlreq.basehandler):
105 """Handler that enables HTTP Range headers.
105 """Handler that enables HTTP Range headers.
106
106
107 This was extremely simple. The Range header is a HTTP feature to
107 This was extremely simple. The Range header is a HTTP feature to
108 begin with so all this class does is tell urllib2 that the
108 begin with so all this class does is tell urllib2 that the
109 "206 Partial Content" response from the HTTP server is what we
109 "206 Partial Content" response from the HTTP server is what we
110 expected.
110 expected.
111 """
111 """
112
112
113 def http_error_206(self, req, fp, code, msg, hdrs):
113 def http_error_206(self, req, fp, code, msg, hdrs):
114 # 206 Partial Content Response
114 # 206 Partial Content Response
115 r = urlreq.addinfourl(fp, hdrs, req.get_full_url())
115 r = urlreq.addinfourl(fp, hdrs, req.get_full_url())
116 r.code = code
116 r.code = code
117 r.msg = msg
117 r.msg = msg
118 return r
118 return r
119
119
120 def http_error_416(self, req, fp, code, msg, hdrs):
120 def http_error_416(self, req, fp, code, msg, hdrs):
121 # HTTP's Range Not Satisfiable error
121 # HTTP's Range Not Satisfiable error
122 raise _RangeError('Requested Range Not Satisfiable')
122 raise _RangeError('Requested Range Not Satisfiable')
123
123
124
124
125 def build_opener(ui, authinfo):
125 def build_opener(ui, authinfo):
126 # urllib cannot handle URLs with embedded user or passwd
126 # urllib cannot handle URLs with embedded user or passwd
127 urlopener = url.opener(ui, authinfo)
127 urlopener = url.opener(ui, authinfo)
128 urlopener.add_handler(_HTTPRangeHandler())
128 urlopener.add_handler(_HTTPRangeHandler())
129
129
130 class statichttpvfs(vfsmod.abstractvfs):
130 class statichttpvfs(vfsmod.abstractvfs):
131 def __init__(self, base):
131 def __init__(self, base):
132 self.base = base
132 self.base = base
133 self.options = {}
133 self.options = {}
134
134
135 def __call__(self, path, mode=b'r', *args, **kw):
135 def __call__(self, path, mode=b'r', *args, **kw):
136 if mode not in (b'r', b'rb'):
136 if mode not in (b'r', b'rb'):
137 raise IOError('Permission denied')
137 raise IOError('Permission denied')
138 f = b"/".join((self.base, urlreq.quote(path)))
138 f = b"/".join((self.base, urlreq.quote(path)))
139 return httprangereader(f, urlopener)
139 return httprangereader(f, urlopener)
140
140
141 def join(self, path, *insidef):
141 def join(self, path, *insidef):
142 if path:
142 if path:
143 return pathutil.join(self.base, path, *insidef)
143 return pathutil.join(self.base, path, *insidef)
144 else:
144 else:
145 return self.base
145 return self.base
146
146
147 return statichttpvfs
147 return statichttpvfs
148
148
149
149
150 class statichttppeer(localrepo.localpeer):
150 class statichttppeer(localrepo.localpeer):
151 def local(self):
151 def local(self):
152 return None
152 return None
153
153
154 def canpush(self):
154 def canpush(self):
155 return False
155 return False
156
156
157
157
158 class statichttprepository(
158 class statichttprepository(
159 localrepo.localrepository, localrepo.revlogfilestorage
159 localrepo.localrepository, localrepo.revlogfilestorage
160 ):
160 ):
161 supported = localrepo.localrepository._basesupported
161 supported = localrepo.localrepository._basesupported
162
162
163 manifestlog: manifest.ManifestLog
164
163 def __init__(self, ui, path):
165 def __init__(self, ui, path):
164 self._url = path
166 self._url = path
165 self.ui = ui
167 self.ui = ui
166
168
167 self.root = path
169 self.root = path
168 u = urlutil.url(path.rstrip(b'/') + b"/.hg")
170 u = urlutil.url(path.rstrip(b'/') + b"/.hg")
169 self.path, authinfo = u.authinfo()
171 self.path, authinfo = u.authinfo()
170
172
171 vfsclass = build_opener(ui, authinfo)
173 vfsclass = build_opener(ui, authinfo)
172 self.vfs = vfsclass(self.path)
174 self.vfs = vfsclass(self.path)
173 self.cachevfs = vfsclass(self.vfs.join(b'cache'))
175 self.cachevfs = vfsclass(self.vfs.join(b'cache'))
174 self._phasedefaults = []
176 self._phasedefaults = []
175
177
176 self.names = namespaces.namespaces()
178 self.names = namespaces.namespaces()
177 self.filtername = None
179 self.filtername = None
178 self._extrafilterid = None
180 self._extrafilterid = None
179 self._wanted_sidedata = set()
181 self._wanted_sidedata = set()
180 self.features = set()
182 self.features = set()
181
183
182 try:
184 try:
183 requirements = set(self.vfs.read(b'requires').splitlines())
185 requirements = set(self.vfs.read(b'requires').splitlines())
184 except FileNotFoundError:
186 except FileNotFoundError:
185 requirements = set()
187 requirements = set()
186
188
187 # check if it is a non-empty old-style repository
189 # check if it is a non-empty old-style repository
188 try:
190 try:
189 fp = self.vfs(b"00changelog.i")
191 fp = self.vfs(b"00changelog.i")
190 fp.read(1)
192 fp.read(1)
191 fp.close()
193 fp.close()
192 except FileNotFoundError:
194 except FileNotFoundError:
193 # we do not care about empty old-style repositories here
195 # we do not care about empty old-style repositories here
194 msg = _(b"'%s' does not appear to be an hg repository") % path
196 msg = _(b"'%s' does not appear to be an hg repository") % path
195 raise error.RepoError(msg)
197 raise error.RepoError(msg)
196 if requirementsmod.SHARESAFE_REQUIREMENT in requirements:
198 if requirementsmod.SHARESAFE_REQUIREMENT in requirements:
197 storevfs = vfsclass(self.vfs.join(b'store'))
199 storevfs = vfsclass(self.vfs.join(b'store'))
198 requirements |= set(storevfs.read(b'requires').splitlines())
200 requirements |= set(storevfs.read(b'requires').splitlines())
199
201
200 supportedrequirements = localrepo.gathersupportedrequirements(ui)
202 supportedrequirements = localrepo.gathersupportedrequirements(ui)
201 localrepo.ensurerequirementsrecognized(
203 localrepo.ensurerequirementsrecognized(
202 requirements, supportedrequirements
204 requirements, supportedrequirements
203 )
205 )
204 localrepo.ensurerequirementscompatible(ui, requirements)
206 localrepo.ensurerequirementscompatible(ui, requirements)
205 self.nodeconstants = sha1nodeconstants
207 self.nodeconstants = sha1nodeconstants
206 self.nullid = self.nodeconstants.nullid
208 self.nullid = self.nodeconstants.nullid
207
209
208 # setup store
210 # setup store
209 self.store = localrepo.makestore(requirements, self.path, vfsclass)
211 self.store = localrepo.makestore(requirements, self.path, vfsclass)
210 self.spath = self.store.path
212 self.spath = self.store.path
211 self.svfs = self.store.opener
213 self.svfs = self.store.opener
212 self.sjoin = self.store.join
214 self.sjoin = self.store.join
213 self._filecache = {}
215 self._filecache = {}
214 self.requirements = requirements
216 self.requirements = requirements
215
217
216 rootmanifest = manifest.manifestrevlog(self.nodeconstants, self.svfs)
218 rootmanifest = manifest.manifestrevlog(self.nodeconstants, self.svfs)
217 self.manifestlog = manifest.manifestlog(
219 self.manifestlog = manifest.manifestlog(
218 self.svfs, self, rootmanifest, self.narrowmatch()
220 self.svfs, self, rootmanifest, self.narrowmatch()
219 )
221 )
220 self.changelog = changelog.changelog(self.svfs)
222 self.changelog = changelog.changelog(self.svfs)
221 self._tags = None
223 self._tags = None
222 self.nodetagscache = None
224 self.nodetagscache = None
223 self._branchcaches = branchmap.BranchMapCache()
225 self._branchcaches = branchmap.BranchMapCache()
224 self._revbranchcache = None
226 self._revbranchcache = None
225 self.encodepats = None
227 self.encodepats = None
226 self.decodepats = None
228 self.decodepats = None
227 self._transref = None
229 self._transref = None
228 self._dirstate = None
230 self._dirstate = None
229
231
230 def _restrictcapabilities(self, caps):
232 def _restrictcapabilities(self, caps):
231 caps = super(statichttprepository, self)._restrictcapabilities(caps)
233 caps = super(statichttprepository, self)._restrictcapabilities(caps)
232 return caps.difference([b"pushkey"])
234 return caps.difference([b"pushkey"])
233
235
234 def url(self):
236 def url(self):
235 return self._url
237 return self._url
236
238
237 def local(self):
239 def local(self):
238 return False
240 return False
239
241
240 def peer(self, path=None, remotehidden=False):
242 def peer(self, path=None, remotehidden=False):
241 return statichttppeer(self, path=path, remotehidden=remotehidden)
243 return statichttppeer(self, path=path, remotehidden=remotehidden)
242
244
243 def wlock(self, wait=True):
245 def wlock(self, wait=True):
244 raise error.LockUnavailable(
246 raise error.LockUnavailable(
245 0,
247 0,
246 pycompat.sysstr(_(b'lock not available')),
248 pycompat.sysstr(_(b'lock not available')),
247 b'lock',
249 b'lock',
248 _(b'cannot lock static-http repository'),
250 _(b'cannot lock static-http repository'),
249 )
251 )
250
252
251 def lock(self, wait=True):
253 def lock(self, wait=True):
252 raise error.LockUnavailable(
254 raise error.LockUnavailable(
253 0,
255 0,
254 pycompat.sysstr(_(b'lock not available')),
256 pycompat.sysstr(_(b'lock not available')),
255 b'lock',
257 b'lock',
256 _(b'cannot lock static-http repository'),
258 _(b'cannot lock static-http repository'),
257 )
259 )
258
260
259 def _writecaches(self):
261 def _writecaches(self):
260 pass # statichttprepository are read only
262 pass # statichttprepository are read only
261
263
262
264
263 def make_peer(
265 def make_peer(
264 ui, path, create, intents=None, createopts=None, remotehidden=False
266 ui, path, create, intents=None, createopts=None, remotehidden=False
265 ):
267 ):
266 if create:
268 if create:
267 raise error.Abort(_(b'cannot create new static-http repository'))
269 raise error.Abort(_(b'cannot create new static-http repository'))
268 url = path.loc[7:]
270 url = path.loc[7:]
269 return statichttprepository(ui, url).peer(
271 return statichttprepository(ui, url).peer(
270 path=path, remotehidden=remotehidden
272 path=path, remotehidden=remotehidden
271 )
273 )
@@ -1,1250 +1,1250 b''
1 # store.py - repository store handling for Mercurial)
1 # store.py - repository store handling for Mercurial)
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import collections
8 import collections
9 import functools
9 import functools
10 import os
10 import os
11 import re
11 import re
12 import stat
12 import stat
13 import typing
13 import typing
14
14
15 from typing import Generator, List
15 from typing import Generator, List
16
16
17 from .i18n import _
17 from .i18n import _
18 from .thirdparty import attr
18 from .thirdparty import attr
19
19
20 # Force pytype to use the non-vendored package
20 # Force pytype to use the non-vendored package
21 if typing.TYPE_CHECKING:
21 if typing.TYPE_CHECKING:
22 # noinspection PyPackageRequirements
22 # noinspection PyPackageRequirements
23 import attr
23 import attr
24
24
25 from .node import hex
25 from .node import hex
26 from .revlogutils.constants import (
26 from .revlogutils.constants import (
27 INDEX_HEADER,
27 INDEX_HEADER,
28 KIND_CHANGELOG,
28 KIND_CHANGELOG,
29 KIND_FILELOG,
29 KIND_FILELOG,
30 KIND_MANIFESTLOG,
30 KIND_MANIFESTLOG,
31 )
31 )
32 from . import (
32 from . import (
33 changelog,
33 changelog,
34 error,
34 error,
35 filelog,
35 filelog,
36 manifest,
36 manifest,
37 policy,
37 policy,
38 pycompat,
38 pycompat,
39 revlog as revlogmod,
39 revlog as revlogmod,
40 util,
40 util,
41 vfs as vfsmod,
41 vfs as vfsmod,
42 )
42 )
43 from .utils import hashutil
43 from .utils import hashutil
44
44
45 parsers = policy.importmod('parsers')
45 parsers = policy.importmod('parsers')
46 # how much bytes should be read from fncache in one read
46 # how much bytes should be read from fncache in one read
47 # It is done to prevent loading large fncache files into memory
47 # It is done to prevent loading large fncache files into memory
48 fncache_chunksize = 10**6
48 fncache_chunksize = 10**6
49
49
50
50
51 def _match_tracked_entry(entry: "BaseStoreEntry", matcher):
51 def _match_tracked_entry(entry: "BaseStoreEntry", matcher):
52 """parses a fncache entry and returns whether the entry is tracking a path
52 """parses a fncache entry and returns whether the entry is tracking a path
53 matched by matcher or not.
53 matched by matcher or not.
54
54
55 If matcher is None, returns True"""
55 If matcher is None, returns True"""
56
56
57 if matcher is None:
57 if matcher is None:
58 return True
58 return True
59
59
60 # TODO: make this safe for other entry types. Currently, the various
60 # TODO: make this safe for other entry types. Currently, the various
61 # store.data_entry generators only yield RevlogStoreEntry, so the
61 # store.data_entry generators only yield RevlogStoreEntry, so the
62 # attributes do exist on `entry`.
62 # attributes do exist on `entry`.
63 # pytype: disable=attribute-error
63 # pytype: disable=attribute-error
64 if entry.is_filelog:
64 if entry.is_filelog:
65 return matcher(entry.target_id)
65 return matcher(entry.target_id)
66 elif entry.is_manifestlog:
66 elif entry.is_manifestlog:
67 return matcher.visitdir(entry.target_id.rstrip(b'/'))
67 return matcher.visitdir(entry.target_id.rstrip(b'/'))
68 # pytype: enable=attribute-error
68 # pytype: enable=attribute-error
69 raise error.ProgrammingError(b"cannot process entry %r" % entry)
69 raise error.ProgrammingError(b"cannot process entry %r" % entry)
70
70
71
71
72 # This avoids a collision between a file named foo and a dir named
72 # This avoids a collision between a file named foo and a dir named
73 # foo.i or foo.d
73 # foo.i or foo.d
74 def _encodedir(path):
74 def _encodedir(path):
75 """
75 """
76 >>> _encodedir(b'data/foo.i')
76 >>> _encodedir(b'data/foo.i')
77 'data/foo.i'
77 'data/foo.i'
78 >>> _encodedir(b'data/foo.i/bla.i')
78 >>> _encodedir(b'data/foo.i/bla.i')
79 'data/foo.i.hg/bla.i'
79 'data/foo.i.hg/bla.i'
80 >>> _encodedir(b'data/foo.i.hg/bla.i')
80 >>> _encodedir(b'data/foo.i.hg/bla.i')
81 'data/foo.i.hg.hg/bla.i'
81 'data/foo.i.hg.hg/bla.i'
82 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
82 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
83 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
83 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
84 """
84 """
85 return (
85 return (
86 path.replace(b".hg/", b".hg.hg/")
86 path.replace(b".hg/", b".hg.hg/")
87 .replace(b".i/", b".i.hg/")
87 .replace(b".i/", b".i.hg/")
88 .replace(b".d/", b".d.hg/")
88 .replace(b".d/", b".d.hg/")
89 )
89 )
90
90
91
91
92 encodedir = getattr(parsers, 'encodedir', _encodedir)
92 encodedir = getattr(parsers, 'encodedir', _encodedir)
93
93
94
94
95 def decodedir(path):
95 def decodedir(path):
96 """
96 """
97 >>> decodedir(b'data/foo.i')
97 >>> decodedir(b'data/foo.i')
98 'data/foo.i'
98 'data/foo.i'
99 >>> decodedir(b'data/foo.i.hg/bla.i')
99 >>> decodedir(b'data/foo.i.hg/bla.i')
100 'data/foo.i/bla.i'
100 'data/foo.i/bla.i'
101 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
101 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
102 'data/foo.i.hg/bla.i'
102 'data/foo.i.hg/bla.i'
103 """
103 """
104 if b".hg/" not in path:
104 if b".hg/" not in path:
105 return path
105 return path
106 return (
106 return (
107 path.replace(b".d.hg/", b".d/")
107 path.replace(b".d.hg/", b".d/")
108 .replace(b".i.hg/", b".i/")
108 .replace(b".i.hg/", b".i/")
109 .replace(b".hg.hg/", b".hg/")
109 .replace(b".hg.hg/", b".hg/")
110 )
110 )
111
111
112
112
113 def _reserved():
113 def _reserved():
114 """characters that are problematic for filesystems
114 """characters that are problematic for filesystems
115
115
116 * ascii escapes (0..31)
116 * ascii escapes (0..31)
117 * ascii hi (126..255)
117 * ascii hi (126..255)
118 * windows specials
118 * windows specials
119
119
120 these characters will be escaped by encodefunctions
120 these characters will be escaped by encodefunctions
121 """
121 """
122 winreserved = [ord(x) for x in u'\\:*?"<>|']
122 winreserved = [ord(x) for x in u'\\:*?"<>|']
123 for x in range(32):
123 for x in range(32):
124 yield x
124 yield x
125 for x in range(126, 256):
125 for x in range(126, 256):
126 yield x
126 yield x
127 for x in winreserved:
127 for x in winreserved:
128 yield x
128 yield x
129
129
130
130
131 def _buildencodefun():
131 def _buildencodefun():
132 """
132 """
133 >>> enc, dec = _buildencodefun()
133 >>> enc, dec = _buildencodefun()
134
134
135 >>> enc(b'nothing/special.txt')
135 >>> enc(b'nothing/special.txt')
136 'nothing/special.txt'
136 'nothing/special.txt'
137 >>> dec(b'nothing/special.txt')
137 >>> dec(b'nothing/special.txt')
138 'nothing/special.txt'
138 'nothing/special.txt'
139
139
140 >>> enc(b'HELLO')
140 >>> enc(b'HELLO')
141 '_h_e_l_l_o'
141 '_h_e_l_l_o'
142 >>> dec(b'_h_e_l_l_o')
142 >>> dec(b'_h_e_l_l_o')
143 'HELLO'
143 'HELLO'
144
144
145 >>> enc(b'hello:world?')
145 >>> enc(b'hello:world?')
146 'hello~3aworld~3f'
146 'hello~3aworld~3f'
147 >>> dec(b'hello~3aworld~3f')
147 >>> dec(b'hello~3aworld~3f')
148 'hello:world?'
148 'hello:world?'
149
149
150 >>> enc(b'the\\x07quick\\xADshot')
150 >>> enc(b'the\\x07quick\\xADshot')
151 'the~07quick~adshot'
151 'the~07quick~adshot'
152 >>> dec(b'the~07quick~adshot')
152 >>> dec(b'the~07quick~adshot')
153 'the\\x07quick\\xadshot'
153 'the\\x07quick\\xadshot'
154 """
154 """
155 e = b'_'
155 e = b'_'
156 xchr = pycompat.bytechr
156 xchr = pycompat.bytechr
157 asciistr = list(map(xchr, range(127)))
157 asciistr = list(map(xchr, range(127)))
158 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
158 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
159
159
160 cmap = {x: x for x in asciistr}
160 cmap = {x: x for x in asciistr}
161 for x in _reserved():
161 for x in _reserved():
162 cmap[xchr(x)] = b"~%02x" % x
162 cmap[xchr(x)] = b"~%02x" % x
163 for x in capitals + [ord(e)]:
163 for x in capitals + [ord(e)]:
164 cmap[xchr(x)] = e + xchr(x).lower()
164 cmap[xchr(x)] = e + xchr(x).lower()
165
165
166 dmap = {}
166 dmap = {}
167 for k, v in cmap.items():
167 for k, v in cmap.items():
168 dmap[v] = k
168 dmap[v] = k
169
169
170 def decode(s):
170 def decode(s):
171 i = 0
171 i = 0
172 while i < len(s):
172 while i < len(s):
173 for l in range(1, 4):
173 for l in range(1, 4):
174 try:
174 try:
175 yield dmap[s[i : i + l]]
175 yield dmap[s[i : i + l]]
176 i += l
176 i += l
177 break
177 break
178 except KeyError:
178 except KeyError:
179 pass
179 pass
180 else:
180 else:
181 raise KeyError
181 raise KeyError
182
182
183 return (
183 return (
184 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
184 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
185 lambda s: b''.join(list(decode(s))),
185 lambda s: b''.join(list(decode(s))),
186 )
186 )
187
187
188
188
189 _encodefname, _decodefname = _buildencodefun()
189 _encodefname, _decodefname = _buildencodefun()
190
190
191
191
192 def encodefilename(s):
192 def encodefilename(s):
193 """
193 """
194 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
194 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
195 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
195 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
196 """
196 """
197 return _encodefname(encodedir(s))
197 return _encodefname(encodedir(s))
198
198
199
199
200 def decodefilename(s):
200 def decodefilename(s):
201 """
201 """
202 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
202 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
203 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
203 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
204 """
204 """
205 return decodedir(_decodefname(s))
205 return decodedir(_decodefname(s))
206
206
207
207
208 def _buildlowerencodefun():
208 def _buildlowerencodefun():
209 """
209 """
210 >>> f = _buildlowerencodefun()
210 >>> f = _buildlowerencodefun()
211 >>> f(b'nothing/special.txt')
211 >>> f(b'nothing/special.txt')
212 'nothing/special.txt'
212 'nothing/special.txt'
213 >>> f(b'HELLO')
213 >>> f(b'HELLO')
214 'hello'
214 'hello'
215 >>> f(b'hello:world?')
215 >>> f(b'hello:world?')
216 'hello~3aworld~3f'
216 'hello~3aworld~3f'
217 >>> f(b'the\\x07quick\\xADshot')
217 >>> f(b'the\\x07quick\\xADshot')
218 'the~07quick~adshot'
218 'the~07quick~adshot'
219 """
219 """
220 xchr = pycompat.bytechr
220 xchr = pycompat.bytechr
221 cmap = {xchr(x): xchr(x) for x in range(127)}
221 cmap = {xchr(x): xchr(x) for x in range(127)}
222 for x in _reserved():
222 for x in _reserved():
223 cmap[xchr(x)] = b"~%02x" % x
223 cmap[xchr(x)] = b"~%02x" % x
224 for x in range(ord(b"A"), ord(b"Z") + 1):
224 for x in range(ord(b"A"), ord(b"Z") + 1):
225 cmap[xchr(x)] = xchr(x).lower()
225 cmap[xchr(x)] = xchr(x).lower()
226
226
227 def lowerencode(s):
227 def lowerencode(s):
228 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
228 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
229
229
230 return lowerencode
230 return lowerencode
231
231
232
232
233 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
233 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
234
234
235 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
235 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
236 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
236 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
237 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
237 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
238
238
239
239
240 def _auxencode(path, dotencode):
240 def _auxencode(path, dotencode):
241 """
241 """
242 Encodes filenames containing names reserved by Windows or which end in
242 Encodes filenames containing names reserved by Windows or which end in
243 period or space. Does not touch other single reserved characters c.
243 period or space. Does not touch other single reserved characters c.
244 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
244 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
245 Additionally encodes space or period at the beginning, if dotencode is
245 Additionally encodes space or period at the beginning, if dotencode is
246 True. Parameter path is assumed to be all lowercase.
246 True. Parameter path is assumed to be all lowercase.
247 A segment only needs encoding if a reserved name appears as a
247 A segment only needs encoding if a reserved name appears as a
248 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
248 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
249 doesn't need encoding.
249 doesn't need encoding.
250
250
251 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
251 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
252 >>> _auxencode(s.split(b'/'), True)
252 >>> _auxencode(s.split(b'/'), True)
253 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
253 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
254 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
254 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
255 >>> _auxencode(s.split(b'/'), False)
255 >>> _auxencode(s.split(b'/'), False)
256 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
256 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
257 >>> _auxencode([b'foo. '], True)
257 >>> _auxencode([b'foo. '], True)
258 ['foo.~20']
258 ['foo.~20']
259 >>> _auxencode([b' .foo'], True)
259 >>> _auxencode([b' .foo'], True)
260 ['~20.foo']
260 ['~20.foo']
261 """
261 """
262 for i, n in enumerate(path):
262 for i, n in enumerate(path):
263 if not n:
263 if not n:
264 continue
264 continue
265 if dotencode and n[0] in b'. ':
265 if dotencode and n[0] in b'. ':
266 n = b"~%02x" % ord(n[0:1]) + n[1:]
266 n = b"~%02x" % ord(n[0:1]) + n[1:]
267 path[i] = n
267 path[i] = n
268 else:
268 else:
269 l = n.find(b'.')
269 l = n.find(b'.')
270 if l == -1:
270 if l == -1:
271 l = len(n)
271 l = len(n)
272 if (l == 3 and n[:3] in _winres3) or (
272 if (l == 3 and n[:3] in _winres3) or (
273 l == 4
273 l == 4
274 and n[3:4] <= b'9'
274 and n[3:4] <= b'9'
275 and n[3:4] >= b'1'
275 and n[3:4] >= b'1'
276 and n[:3] in _winres4
276 and n[:3] in _winres4
277 ):
277 ):
278 # encode third letter ('aux' -> 'au~78')
278 # encode third letter ('aux' -> 'au~78')
279 ec = b"~%02x" % ord(n[2:3])
279 ec = b"~%02x" % ord(n[2:3])
280 n = n[0:2] + ec + n[3:]
280 n = n[0:2] + ec + n[3:]
281 path[i] = n
281 path[i] = n
282 if n[-1] in b'. ':
282 if n[-1] in b'. ':
283 # encode last period or space ('foo...' -> 'foo..~2e')
283 # encode last period or space ('foo...' -> 'foo..~2e')
284 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
284 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
285 return path
285 return path
286
286
287
287
288 _maxstorepathlen = 120
288 _maxstorepathlen = 120
289 _dirprefixlen = 8
289 _dirprefixlen = 8
290 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
290 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
291
291
292
292
293 def _hashencode(path, dotencode):
293 def _hashencode(path, dotencode):
294 digest = hex(hashutil.sha1(path).digest())
294 digest = hex(hashutil.sha1(path).digest())
295 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
295 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
296 parts = _auxencode(le, dotencode)
296 parts = _auxencode(le, dotencode)
297 basename = parts[-1]
297 basename = parts[-1]
298 _root, ext = os.path.splitext(basename)
298 _root, ext = os.path.splitext(basename)
299 sdirs = []
299 sdirs = []
300 sdirslen = 0
300 sdirslen = 0
301 for p in parts[:-1]:
301 for p in parts[:-1]:
302 d = p[:_dirprefixlen]
302 d = p[:_dirprefixlen]
303 if d[-1] in b'. ':
303 if d[-1] in b'. ':
304 # Windows can't access dirs ending in period or space
304 # Windows can't access dirs ending in period or space
305 d = d[:-1] + b'_'
305 d = d[:-1] + b'_'
306 if sdirslen == 0:
306 if sdirslen == 0:
307 t = len(d)
307 t = len(d)
308 else:
308 else:
309 t = sdirslen + 1 + len(d)
309 t = sdirslen + 1 + len(d)
310 if t > _maxshortdirslen:
310 if t > _maxshortdirslen:
311 break
311 break
312 sdirs.append(d)
312 sdirs.append(d)
313 sdirslen = t
313 sdirslen = t
314 dirs = b'/'.join(sdirs)
314 dirs = b'/'.join(sdirs)
315 if len(dirs) > 0:
315 if len(dirs) > 0:
316 dirs += b'/'
316 dirs += b'/'
317 res = b'dh/' + dirs + digest + ext
317 res = b'dh/' + dirs + digest + ext
318 spaceleft = _maxstorepathlen - len(res)
318 spaceleft = _maxstorepathlen - len(res)
319 if spaceleft > 0:
319 if spaceleft > 0:
320 filler = basename[:spaceleft]
320 filler = basename[:spaceleft]
321 res = b'dh/' + dirs + filler + digest + ext
321 res = b'dh/' + dirs + filler + digest + ext
322 return res
322 return res
323
323
324
324
325 def _hybridencode(path, dotencode):
325 def _hybridencode(path, dotencode):
326 """encodes path with a length limit
326 """encodes path with a length limit
327
327
328 Encodes all paths that begin with 'data/', according to the following.
328 Encodes all paths that begin with 'data/', according to the following.
329
329
330 Default encoding (reversible):
330 Default encoding (reversible):
331
331
332 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
332 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
333 characters are encoded as '~xx', where xx is the two digit hex code
333 characters are encoded as '~xx', where xx is the two digit hex code
334 of the character (see encodefilename).
334 of the character (see encodefilename).
335 Relevant path components consisting of Windows reserved filenames are
335 Relevant path components consisting of Windows reserved filenames are
336 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
336 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
337
337
338 Hashed encoding (not reversible):
338 Hashed encoding (not reversible):
339
339
340 If the default-encoded path is longer than _maxstorepathlen, a
340 If the default-encoded path is longer than _maxstorepathlen, a
341 non-reversible hybrid hashing of the path is done instead.
341 non-reversible hybrid hashing of the path is done instead.
342 This encoding uses up to _dirprefixlen characters of all directory
342 This encoding uses up to _dirprefixlen characters of all directory
343 levels of the lowerencoded path, but not more levels than can fit into
343 levels of the lowerencoded path, but not more levels than can fit into
344 _maxshortdirslen.
344 _maxshortdirslen.
345 Then follows the filler followed by the sha digest of the full path.
345 Then follows the filler followed by the sha digest of the full path.
346 The filler is the beginning of the basename of the lowerencoded path
346 The filler is the beginning of the basename of the lowerencoded path
347 (the basename is everything after the last path separator). The filler
347 (the basename is everything after the last path separator). The filler
348 is as long as possible, filling in characters from the basename until
348 is as long as possible, filling in characters from the basename until
349 the encoded path has _maxstorepathlen characters (or all chars of the
349 the encoded path has _maxstorepathlen characters (or all chars of the
350 basename have been taken).
350 basename have been taken).
351 The extension (e.g. '.i' or '.d') is preserved.
351 The extension (e.g. '.i' or '.d') is preserved.
352
352
353 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
353 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
354 encoding was used.
354 encoding was used.
355 """
355 """
356 path = encodedir(path)
356 path = encodedir(path)
357 ef = _encodefname(path).split(b'/')
357 ef = _encodefname(path).split(b'/')
358 res = b'/'.join(_auxencode(ef, dotencode))
358 res = b'/'.join(_auxencode(ef, dotencode))
359 if len(res) > _maxstorepathlen:
359 if len(res) > _maxstorepathlen:
360 res = _hashencode(path, dotencode)
360 res = _hashencode(path, dotencode)
361 return res
361 return res
362
362
363
363
364 def _pathencode(path):
364 def _pathencode(path):
365 de = encodedir(path)
365 de = encodedir(path)
366 if len(path) > _maxstorepathlen:
366 if len(path) > _maxstorepathlen:
367 return _hashencode(de, True)
367 return _hashencode(de, True)
368 ef = _encodefname(de).split(b'/')
368 ef = _encodefname(de).split(b'/')
369 res = b'/'.join(_auxencode(ef, True))
369 res = b'/'.join(_auxencode(ef, True))
370 if len(res) > _maxstorepathlen:
370 if len(res) > _maxstorepathlen:
371 return _hashencode(de, True)
371 return _hashencode(de, True)
372 return res
372 return res
373
373
374
374
375 _pathencode = getattr(parsers, 'pathencode', _pathencode)
375 _pathencode = getattr(parsers, 'pathencode', _pathencode)
376
376
377
377
378 def _plainhybridencode(f):
378 def _plainhybridencode(f):
379 return _hybridencode(f, False)
379 return _hybridencode(f, False)
380
380
381
381
382 def _calcmode(vfs):
382 def _calcmode(vfs):
383 try:
383 try:
384 # files in .hg/ will be created using this mode
384 # files in .hg/ will be created using this mode
385 mode = vfs.stat().st_mode
385 mode = vfs.stat().st_mode
386 # avoid some useless chmods
386 # avoid some useless chmods
387 if (0o777 & ~util.umask) == (0o777 & mode):
387 if (0o777 & ~util.umask) == (0o777 & mode):
388 mode = None
388 mode = None
389 except OSError:
389 except OSError:
390 mode = None
390 mode = None
391 return mode
391 return mode
392
392
393
393
394 _data = [
394 _data = [
395 b'bookmarks',
395 b'bookmarks',
396 b'narrowspec',
396 b'narrowspec',
397 b'data',
397 b'data',
398 b'meta',
398 b'meta',
399 b'00manifest.d',
399 b'00manifest.d',
400 b'00manifest.i',
400 b'00manifest.i',
401 b'00changelog.d',
401 b'00changelog.d',
402 b'00changelog.i',
402 b'00changelog.i',
403 b'phaseroots',
403 b'phaseroots',
404 b'obsstore',
404 b'obsstore',
405 b'requires',
405 b'requires',
406 ]
406 ]
407
407
408 REVLOG_FILES_EXT = (
408 REVLOG_FILES_EXT = (
409 b'.i',
409 b'.i',
410 b'.idx',
410 b'.idx',
411 b'.d',
411 b'.d',
412 b'.dat',
412 b'.dat',
413 b'.n',
413 b'.n',
414 b'.nd',
414 b'.nd',
415 b'.sda',
415 b'.sda',
416 )
416 )
417 # file extension that also use a `-SOMELONGIDHASH.ext` form
417 # file extension that also use a `-SOMELONGIDHASH.ext` form
418 REVLOG_FILES_LONG_EXT = (
418 REVLOG_FILES_LONG_EXT = (
419 b'.nd',
419 b'.nd',
420 b'.idx',
420 b'.idx',
421 b'.dat',
421 b'.dat',
422 b'.sda',
422 b'.sda',
423 )
423 )
424 # files that are "volatile" and might change between listing and streaming
424 # files that are "volatile" and might change between listing and streaming
425 #
425 #
426 # note: the ".nd" file are nodemap data and won't "change" but they might be
426 # note: the ".nd" file are nodemap data and won't "change" but they might be
427 # deleted.
427 # deleted.
428 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
428 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
429
429
430 # some exception to the above matching
430 # some exception to the above matching
431 #
431 #
432 # XXX This is currently not in use because of issue6542
432 # XXX This is currently not in use because of issue6542
433 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
433 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
434
434
435
435
436 def is_revlog(f, kind, st):
436 def is_revlog(f, kind, st):
437 if kind != stat.S_IFREG:
437 if kind != stat.S_IFREG:
438 return False
438 return False
439 if f.endswith(REVLOG_FILES_EXT):
439 if f.endswith(REVLOG_FILES_EXT):
440 return True
440 return True
441 return False
441 return False
442
442
443
443
444 def is_revlog_file(f):
444 def is_revlog_file(f):
445 if f.endswith(REVLOG_FILES_EXT):
445 if f.endswith(REVLOG_FILES_EXT):
446 return True
446 return True
447 return False
447 return False
448
448
449
449
450 @attr.s(slots=True)
450 @attr.s(slots=True)
451 class StoreFile:
451 class StoreFile:
452 """a file matching a store entry"""
452 """a file matching a store entry"""
453
453
454 unencoded_path = attr.ib()
454 unencoded_path = attr.ib()
455 _file_size = attr.ib(default=None)
455 _file_size = attr.ib(default=None)
456 is_volatile = attr.ib(default=False)
456 is_volatile = attr.ib(default=False)
457
457
458 def file_size(self, vfs):
458 def file_size(self, vfs):
459 if self._file_size is None:
459 if self._file_size is None:
460 if vfs is None:
460 if vfs is None:
461 msg = b"calling vfs-less file_size without prior call: %s"
461 msg = b"calling vfs-less file_size without prior call: %s"
462 msg %= self.unencoded_path
462 msg %= self.unencoded_path
463 raise error.ProgrammingError(msg)
463 raise error.ProgrammingError(msg)
464 try:
464 try:
465 self._file_size = vfs.stat(self.unencoded_path).st_size
465 self._file_size = vfs.stat(self.unencoded_path).st_size
466 except FileNotFoundError:
466 except FileNotFoundError:
467 self._file_size = 0
467 self._file_size = 0
468 return self._file_size
468 return self._file_size
469
469
470 @property
470 @property
471 def has_size(self):
471 def has_size(self):
472 return self._file_size is not None
472 return self._file_size is not None
473
473
474 def get_stream(self, vfs, copies):
474 def get_stream(self, vfs, copies):
475 """return data "stream" information for this file
475 """return data "stream" information for this file
476
476
477 (unencoded_file_path, content_iterator, content_size)
477 (unencoded_file_path, content_iterator, content_size)
478 """
478 """
479 size = self.file_size(None)
479 size = self.file_size(None)
480
480
481 def get_stream():
481 def get_stream():
482 actual_path = copies[vfs.join(self.unencoded_path)]
482 actual_path = copies[vfs.join(self.unencoded_path)]
483 with open(actual_path, 'rb') as fp:
483 with open(actual_path, 'rb') as fp:
484 yield None # ready to stream
484 yield None # ready to stream
485 if size <= 65536:
485 if size <= 65536:
486 yield fp.read(size)
486 yield fp.read(size)
487 else:
487 else:
488 yield from util.filechunkiter(fp, limit=size)
488 yield from util.filechunkiter(fp, limit=size)
489
489
490 s = get_stream()
490 s = get_stream()
491 next(s)
491 next(s)
492 return (self.unencoded_path, s, size)
492 return (self.unencoded_path, s, size)
493
493
494
494
495 @attr.s(slots=True, init=False)
495 @attr.s(slots=True, init=False)
496 class BaseStoreEntry:
496 class BaseStoreEntry:
497 """An entry in the store
497 """An entry in the store
498
498
499 This is returned by `store.walk` and represent some data in the store."""
499 This is returned by `store.walk` and represent some data in the store."""
500
500
501 maybe_volatile = True
501 maybe_volatile = True
502
502
503 def files(self) -> List[StoreFile]:
503 def files(self) -> List[StoreFile]:
504 raise NotImplementedError
504 raise NotImplementedError
505
505
506 def get_streams(
506 def get_streams(
507 self,
507 self,
508 repo=None,
508 repo=None,
509 vfs=None,
509 vfs=None,
510 copies=None,
510 copies=None,
511 max_changeset=None,
511 max_changeset=None,
512 preserve_file_count=False,
512 preserve_file_count=False,
513 ):
513 ):
514 """return a list of data stream associated to files for this entry
514 """return a list of data stream associated to files for this entry
515
515
516 return [(unencoded_file_path, content_iterator, content_size), …]
516 return [(unencoded_file_path, content_iterator, content_size), …]
517 """
517 """
518 assert vfs is not None
518 assert vfs is not None
519 return [f.get_stream(vfs, copies) for f in self.files()]
519 return [f.get_stream(vfs, copies) for f in self.files()]
520
520
521
521
522 @attr.s(slots=True, init=False)
522 @attr.s(slots=True, init=False)
523 class SimpleStoreEntry(BaseStoreEntry):
523 class SimpleStoreEntry(BaseStoreEntry):
524 """A generic entry in the store"""
524 """A generic entry in the store"""
525
525
526 is_revlog = False
526 is_revlog = False
527
527
528 maybe_volatile = attr.ib()
528 maybe_volatile = attr.ib()
529 _entry_path = attr.ib()
529 _entry_path = attr.ib()
530 _is_volatile = attr.ib(default=False)
530 _is_volatile = attr.ib(default=False)
531 _file_size = attr.ib(default=None)
531 _file_size = attr.ib(default=None)
532 _files = attr.ib(default=None)
532 _files = attr.ib(default=None)
533
533
534 def __init__(
534 def __init__(
535 self,
535 self,
536 entry_path,
536 entry_path,
537 is_volatile=False,
537 is_volatile=False,
538 file_size=None,
538 file_size=None,
539 ):
539 ):
540 super().__init__()
540 super().__init__()
541 self._entry_path = entry_path
541 self._entry_path = entry_path
542 self._is_volatile = is_volatile
542 self._is_volatile = is_volatile
543 self._file_size = file_size
543 self._file_size = file_size
544 self._files = None
544 self._files = None
545 self.maybe_volatile = is_volatile
545 self.maybe_volatile = is_volatile
546
546
547 def files(self) -> List[StoreFile]:
547 def files(self) -> List[StoreFile]:
548 if self._files is None:
548 if self._files is None:
549 self._files = [
549 self._files = [
550 StoreFile(
550 StoreFile(
551 unencoded_path=self._entry_path,
551 unencoded_path=self._entry_path,
552 file_size=self._file_size,
552 file_size=self._file_size,
553 is_volatile=self._is_volatile,
553 is_volatile=self._is_volatile,
554 )
554 )
555 ]
555 ]
556 return self._files
556 return self._files
557
557
558
558
559 @attr.s(slots=True, init=False)
559 @attr.s(slots=True, init=False)
560 class RevlogStoreEntry(BaseStoreEntry):
560 class RevlogStoreEntry(BaseStoreEntry):
561 """A revlog entry in the store"""
561 """A revlog entry in the store"""
562
562
563 is_revlog = True
563 is_revlog = True
564
564
565 revlog_type = attr.ib(default=None)
565 revlog_type = attr.ib(default=None)
566 target_id = attr.ib(default=None)
566 target_id = attr.ib(default=None)
567 maybe_volatile = attr.ib(default=True)
567 maybe_volatile = attr.ib(default=True)
568 _path_prefix = attr.ib(default=None)
568 _path_prefix = attr.ib(default=None)
569 _details = attr.ib(default=None)
569 _details = attr.ib(default=None)
570 _files = attr.ib(default=None)
570 _files = attr.ib(default=None)
571
571
572 def __init__(
572 def __init__(
573 self,
573 self,
574 revlog_type,
574 revlog_type,
575 path_prefix,
575 path_prefix,
576 target_id,
576 target_id,
577 details,
577 details,
578 ):
578 ):
579 super().__init__()
579 super().__init__()
580 self.revlog_type = revlog_type
580 self.revlog_type = revlog_type
581 self.target_id = target_id
581 self.target_id = target_id
582 self._path_prefix = path_prefix
582 self._path_prefix = path_prefix
583 assert b'.i' in details, (path_prefix, details)
583 assert b'.i' in details, (path_prefix, details)
584 for ext in details:
584 for ext in details:
585 if ext.endswith(REVLOG_FILES_VOLATILE_EXT):
585 if ext.endswith(REVLOG_FILES_VOLATILE_EXT):
586 self.maybe_volatile = True
586 self.maybe_volatile = True
587 break
587 break
588 else:
588 else:
589 self.maybe_volatile = False
589 self.maybe_volatile = False
590 self._details = details
590 self._details = details
591 self._files = None
591 self._files = None
592
592
593 @property
593 @property
594 def is_changelog(self):
594 def is_changelog(self):
595 return self.revlog_type == KIND_CHANGELOG
595 return self.revlog_type == KIND_CHANGELOG
596
596
597 @property
597 @property
598 def is_manifestlog(self):
598 def is_manifestlog(self):
599 return self.revlog_type == KIND_MANIFESTLOG
599 return self.revlog_type == KIND_MANIFESTLOG
600
600
601 @property
601 @property
602 def is_filelog(self):
602 def is_filelog(self):
603 return self.revlog_type == KIND_FILELOG
603 return self.revlog_type == KIND_FILELOG
604
604
605 def main_file_path(self):
605 def main_file_path(self):
606 """unencoded path of the main revlog file"""
606 """unencoded path of the main revlog file"""
607 return self._path_prefix + b'.i'
607 return self._path_prefix + b'.i'
608
608
609 def files(self) -> List[StoreFile]:
609 def files(self) -> List[StoreFile]:
610 if self._files is None:
610 if self._files is None:
611 self._files = []
611 self._files = []
612 for ext in sorted(self._details, key=_ext_key):
612 for ext in sorted(self._details, key=_ext_key):
613 path = self._path_prefix + ext
613 path = self._path_prefix + ext
614 file_size = self._details[ext]
614 file_size = self._details[ext]
615 # files that are "volatile" and might change between
615 # files that are "volatile" and might change between
616 # listing and streaming
616 # listing and streaming
617 #
617 #
618 # note: the ".nd" file are nodemap data and won't "change"
618 # note: the ".nd" file are nodemap data and won't "change"
619 # but they might be deleted.
619 # but they might be deleted.
620 volatile = ext.endswith(REVLOG_FILES_VOLATILE_EXT)
620 volatile = ext.endswith(REVLOG_FILES_VOLATILE_EXT)
621 f = StoreFile(path, file_size, volatile)
621 f = StoreFile(path, file_size, volatile)
622 self._files.append(f)
622 self._files.append(f)
623 return self._files
623 return self._files
624
624
625 def get_streams(
625 def get_streams(
626 self,
626 self,
627 repo=None,
627 repo=None,
628 vfs=None,
628 vfs=None,
629 copies=None,
629 copies=None,
630 max_changeset=None,
630 max_changeset=None,
631 preserve_file_count=False,
631 preserve_file_count=False,
632 ):
632 ):
633 pre_sized = all(f.has_size for f in self.files())
633 pre_sized = all(f.has_size for f in self.files())
634 if pre_sized and (
634 if pre_sized and (
635 repo is None
635 repo is None
636 or max_changeset is None
636 or max_changeset is None
637 # This use revlog-v2, ignore for now
637 # This use revlog-v2, ignore for now
638 or any(k.endswith(b'.idx') for k in self._details.keys())
638 or any(k.endswith(b'.idx') for k in self._details.keys())
639 # This is not inline, no race expected
639 # This is not inline, no race expected
640 or b'.d' in self._details
640 or b'.d' in self._details
641 ):
641 ):
642 return super().get_streams(
642 return super().get_streams(
643 repo=repo,
643 repo=repo,
644 vfs=vfs,
644 vfs=vfs,
645 copies=copies,
645 copies=copies,
646 max_changeset=max_changeset,
646 max_changeset=max_changeset,
647 preserve_file_count=preserve_file_count,
647 preserve_file_count=preserve_file_count,
648 )
648 )
649 elif not preserve_file_count:
649 elif not preserve_file_count:
650 stream = [
650 stream = [
651 f.get_stream(vfs, copies)
651 f.get_stream(vfs, copies)
652 for f in self.files()
652 for f in self.files()
653 if not f.unencoded_path.endswith((b'.i', b'.d'))
653 if not f.unencoded_path.endswith((b'.i', b'.d'))
654 ]
654 ]
655 rl = self.get_revlog_instance(repo).get_revlog()
655 rl = self.get_revlog_instance(repo).get_revlog()
656 rl_stream = rl.get_streams(max_changeset)
656 rl_stream = rl.get_streams(max_changeset)
657 stream.extend(rl_stream)
657 stream.extend(rl_stream)
658 return stream
658 return stream
659
659
660 name_to_size = {}
660 name_to_size = {}
661 for f in self.files():
661 for f in self.files():
662 name_to_size[f.unencoded_path] = f.file_size(None)
662 name_to_size[f.unencoded_path] = f.file_size(None)
663
663
664 stream = [
664 stream = [
665 f.get_stream(vfs, copies)
665 f.get_stream(vfs, copies)
666 for f in self.files()
666 for f in self.files()
667 if not f.unencoded_path.endswith(b'.i')
667 if not f.unencoded_path.endswith(b'.i')
668 ]
668 ]
669
669
670 index_path = self._path_prefix + b'.i'
670 index_path = self._path_prefix + b'.i'
671
671
672 index_file = None
672 index_file = None
673 try:
673 try:
674 index_file = vfs(index_path)
674 index_file = vfs(index_path)
675 header = index_file.read(INDEX_HEADER.size)
675 header = index_file.read(INDEX_HEADER.size)
676 if revlogmod.revlog.is_inline_index(header):
676 if revlogmod.revlog.is_inline_index(header):
677 size = name_to_size[index_path]
677 size = name_to_size[index_path]
678
678
679 # no split underneath, just return the stream
679 # no split underneath, just return the stream
680 def get_stream():
680 def get_stream():
681 fp = index_file
681 fp = index_file
682 try:
682 try:
683 fp.seek(0)
683 fp.seek(0)
684 yield None
684 yield None
685 if size <= 65536:
685 if size <= 65536:
686 yield fp.read(size)
686 yield fp.read(size)
687 else:
687 else:
688 yield from util.filechunkiter(fp, limit=size)
688 yield from util.filechunkiter(fp, limit=size)
689 finally:
689 finally:
690 fp.close()
690 fp.close()
691
691
692 s = get_stream()
692 s = get_stream()
693 next(s)
693 next(s)
694 index_file = None
694 index_file = None
695 stream.append((index_path, s, size))
695 stream.append((index_path, s, size))
696 else:
696 else:
697 rl = self.get_revlog_instance(repo).get_revlog()
697 rl = self.get_revlog_instance(repo).get_revlog()
698 rl_stream = rl.get_streams(max_changeset, force_inline=True)
698 rl_stream = rl.get_streams(max_changeset, force_inline=True)
699 for name, s, size in rl_stream:
699 for name, s, size in rl_stream:
700 if name_to_size.get(name, 0) != size:
700 if name_to_size.get(name, 0) != size:
701 msg = _(b"expected %d bytes but %d provided for %s")
701 msg = _(b"expected %d bytes but %d provided for %s")
702 msg %= name_to_size.get(name, 0), size, name
702 msg %= name_to_size.get(name, 0), size, name
703 raise error.Abort(msg)
703 raise error.Abort(msg)
704 stream.extend(rl_stream)
704 stream.extend(rl_stream)
705 finally:
705 finally:
706 if index_file is not None:
706 if index_file is not None:
707 index_file.close()
707 index_file.close()
708
708
709 files = self.files()
709 files = self.files()
710 assert len(stream) == len(files), (
710 assert len(stream) == len(files), (
711 stream,
711 stream,
712 files,
712 files,
713 self._path_prefix,
713 self._path_prefix,
714 self.target_id,
714 self.target_id,
715 )
715 )
716 return stream
716 return stream
717
717
718 def get_revlog_instance(self, repo):
718 def get_revlog_instance(self, repo):
719 """Obtain a revlog instance from this store entry
719 """Obtain a revlog instance from this store entry
720
720
721 An instance of the appropriate class is returned.
721 An instance of the appropriate class is returned.
722 """
722 """
723 if self.is_changelog:
723 if self.is_changelog:
724 return changelog.changelog(repo.svfs)
724 return changelog.changelog(repo.svfs)
725 elif self.is_manifestlog:
725 elif self.is_manifestlog:
726 mandir = self.target_id
726 mandir = self.target_id
727 return manifest.manifestrevlog(
727 return manifest.manifestrevlog(
728 repo.nodeconstants, repo.svfs, tree=mandir
728 repo.nodeconstants, repo.svfs, tree=mandir
729 )
729 )
730 else:
730 else:
731 return filelog.filelog(repo.svfs, self.target_id)
731 return filelog.filelog(repo.svfs, self.target_id)
732
732
733
733
734 def _gather_revlog(files_data):
734 def _gather_revlog(files_data):
735 """group files per revlog prefix
735 """group files per revlog prefix
736
736
737 The returns a two level nested dict. The top level key is the revlog prefix
737 The returns a two level nested dict. The top level key is the revlog prefix
738 without extension, the second level is all the file "suffix" that were
738 without extension, the second level is all the file "suffix" that were
739 seen for this revlog and arbitrary file data as value.
739 seen for this revlog and arbitrary file data as value.
740 """
740 """
741 revlogs = collections.defaultdict(dict)
741 revlogs = collections.defaultdict(dict)
742 for u, value in files_data:
742 for u, value in files_data:
743 name, ext = _split_revlog_ext(u)
743 name, ext = _split_revlog_ext(u)
744 revlogs[name][ext] = value
744 revlogs[name][ext] = value
745 return sorted(revlogs.items())
745 return sorted(revlogs.items())
746
746
747
747
748 def _split_revlog_ext(filename):
748 def _split_revlog_ext(filename):
749 """split the revlog file prefix from the variable extension"""
749 """split the revlog file prefix from the variable extension"""
750 if filename.endswith(REVLOG_FILES_LONG_EXT):
750 if filename.endswith(REVLOG_FILES_LONG_EXT):
751 char = b'-'
751 char = b'-'
752 else:
752 else:
753 char = b'.'
753 char = b'.'
754 idx = filename.rfind(char)
754 idx = filename.rfind(char)
755 return filename[:idx], filename[idx:]
755 return filename[:idx], filename[idx:]
756
756
757
757
758 def _ext_key(ext):
758 def _ext_key(ext):
759 """a key to order revlog suffix
759 """a key to order revlog suffix
760
760
761 important to issue .i after other entry."""
761 important to issue .i after other entry."""
762 # the only important part of this order is to keep the `.i` last.
762 # the only important part of this order is to keep the `.i` last.
763 if ext.endswith(b'.n'):
763 if ext.endswith(b'.n'):
764 return (0, ext)
764 return (0, ext)
765 elif ext.endswith(b'.nd'):
765 elif ext.endswith(b'.nd'):
766 return (10, ext)
766 return (10, ext)
767 elif ext.endswith(b'.d'):
767 elif ext.endswith(b'.d'):
768 return (20, ext)
768 return (20, ext)
769 elif ext.endswith(b'.i'):
769 elif ext.endswith(b'.i'):
770 return (50, ext)
770 return (50, ext)
771 else:
771 else:
772 return (40, ext)
772 return (40, ext)
773
773
774
774
775 class basicstore:
775 class basicstore:
776 '''base class for local repository stores'''
776 '''base class for local repository stores'''
777
777
778 def __init__(self, path, vfstype):
778 def __init__(self, path, vfstype):
779 vfs = vfstype(path)
779 vfs = vfstype(path)
780 self.path = vfs.base
780 self.path = vfs.base
781 self.createmode = _calcmode(vfs)
781 self.createmode = _calcmode(vfs)
782 vfs.createmode = self.createmode
782 vfs.createmode = self.createmode
783 self.rawvfs = vfs
783 self.rawvfs = vfs
784 self.vfs = vfsmod.filtervfs(vfs, encodedir)
784 self.vfs = vfsmod.filtervfs(vfs, encodedir)
785 self.opener = self.vfs
785 self.opener = self.vfs
786
786
787 def join(self, f):
787 def join(self, f):
788 return self.path + b'/' + encodedir(f)
788 return self.path + b'/' + encodedir(f)
789
789
790 def _walk(self, relpath, recurse, undecodable=None):
790 def _walk(self, relpath, recurse, undecodable=None):
791 '''yields (revlog_type, unencoded, size)'''
791 '''yields (revlog_type, unencoded, size)'''
792 path = self.path
792 path = self.path
793 if relpath:
793 if relpath:
794 path += b'/' + relpath
794 path += b'/' + relpath
795 striplen = len(self.path) + 1
795 striplen = len(self.path) + 1
796 l = []
796 l = []
797 if self.rawvfs.isdir(path):
797 if self.rawvfs.isdir(path):
798 visit = [path]
798 visit = [path]
799 readdir = self.rawvfs.readdir
799 readdir = self.rawvfs.readdir
800 while visit:
800 while visit:
801 p = visit.pop()
801 p = visit.pop()
802 for f, kind, st in readdir(p, stat=True):
802 for f, kind, st in readdir(p, stat=True):
803 fp = p + b'/' + f
803 fp = p + b'/' + f
804 if is_revlog(f, kind, st):
804 if is_revlog(f, kind, st):
805 n = util.pconvert(fp[striplen:])
805 n = util.pconvert(fp[striplen:])
806 l.append((decodedir(n), st.st_size))
806 l.append((decodedir(n), st.st_size))
807 elif kind == stat.S_IFDIR and recurse:
807 elif kind == stat.S_IFDIR and recurse:
808 visit.append(fp)
808 visit.append(fp)
809
809
810 l.sort()
810 l.sort()
811 return l
811 return l
812
812
813 def changelog(self, trypending, concurrencychecker=None):
813 def changelog(self, trypending, concurrencychecker=None):
814 return changelog.changelog(
814 return changelog.changelog(
815 self.vfs,
815 self.vfs,
816 trypending=trypending,
816 trypending=trypending,
817 concurrencychecker=concurrencychecker,
817 concurrencychecker=concurrencychecker,
818 )
818 )
819
819
820 def manifestlog(self, repo, storenarrowmatch):
820 def manifestlog(self, repo, storenarrowmatch) -> manifest.ManifestLog:
821 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
821 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
822 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
822 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
823
823
824 def data_entries(
824 def data_entries(
825 self, matcher=None, undecodable=None
825 self, matcher=None, undecodable=None
826 ) -> Generator[BaseStoreEntry, None, None]:
826 ) -> Generator[BaseStoreEntry, None, None]:
827 """Like walk, but excluding the changelog and root manifest.
827 """Like walk, but excluding the changelog and root manifest.
828
828
829 When [undecodable] is None, revlogs names that can't be
829 When [undecodable] is None, revlogs names that can't be
830 decoded cause an exception. When it is provided, it should
830 decoded cause an exception. When it is provided, it should
831 be a list and the filenames that can't be decoded are added
831 be a list and the filenames that can't be decoded are added
832 to it instead. This is very rarely needed."""
832 to it instead. This is very rarely needed."""
833 dirs = [
833 dirs = [
834 (b'data', KIND_FILELOG, False),
834 (b'data', KIND_FILELOG, False),
835 (b'meta', KIND_MANIFESTLOG, True),
835 (b'meta', KIND_MANIFESTLOG, True),
836 ]
836 ]
837 for base_dir, rl_type, strip_filename in dirs:
837 for base_dir, rl_type, strip_filename in dirs:
838 files = self._walk(base_dir, True, undecodable=undecodable)
838 files = self._walk(base_dir, True, undecodable=undecodable)
839 for revlog, details in _gather_revlog(files):
839 for revlog, details in _gather_revlog(files):
840 revlog_target_id = revlog.split(b'/', 1)[1]
840 revlog_target_id = revlog.split(b'/', 1)[1]
841 if strip_filename and b'/' in revlog:
841 if strip_filename and b'/' in revlog:
842 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
842 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
843 revlog_target_id += b'/'
843 revlog_target_id += b'/'
844 yield RevlogStoreEntry(
844 yield RevlogStoreEntry(
845 path_prefix=revlog,
845 path_prefix=revlog,
846 revlog_type=rl_type,
846 revlog_type=rl_type,
847 target_id=revlog_target_id,
847 target_id=revlog_target_id,
848 details=details,
848 details=details,
849 )
849 )
850
850
851 def top_entries(
851 def top_entries(
852 self, phase=False, obsolescence=False
852 self, phase=False, obsolescence=False
853 ) -> Generator[BaseStoreEntry, None, None]:
853 ) -> Generator[BaseStoreEntry, None, None]:
854 if phase and self.vfs.exists(b'phaseroots'):
854 if phase and self.vfs.exists(b'phaseroots'):
855 yield SimpleStoreEntry(
855 yield SimpleStoreEntry(
856 entry_path=b'phaseroots',
856 entry_path=b'phaseroots',
857 is_volatile=True,
857 is_volatile=True,
858 )
858 )
859
859
860 if obsolescence and self.vfs.exists(b'obsstore'):
860 if obsolescence and self.vfs.exists(b'obsstore'):
861 # XXX if we had the file size it could be non-volatile
861 # XXX if we had the file size it could be non-volatile
862 yield SimpleStoreEntry(
862 yield SimpleStoreEntry(
863 entry_path=b'obsstore',
863 entry_path=b'obsstore',
864 is_volatile=True,
864 is_volatile=True,
865 )
865 )
866
866
867 files = reversed(self._walk(b'', False))
867 files = reversed(self._walk(b'', False))
868
868
869 changelogs = collections.defaultdict(dict)
869 changelogs = collections.defaultdict(dict)
870 manifestlogs = collections.defaultdict(dict)
870 manifestlogs = collections.defaultdict(dict)
871
871
872 for u, s in files:
872 for u, s in files:
873 if u.startswith(b'00changelog'):
873 if u.startswith(b'00changelog'):
874 name, ext = _split_revlog_ext(u)
874 name, ext = _split_revlog_ext(u)
875 changelogs[name][ext] = s
875 changelogs[name][ext] = s
876 elif u.startswith(b'00manifest'):
876 elif u.startswith(b'00manifest'):
877 name, ext = _split_revlog_ext(u)
877 name, ext = _split_revlog_ext(u)
878 manifestlogs[name][ext] = s
878 manifestlogs[name][ext] = s
879 else:
879 else:
880 yield SimpleStoreEntry(
880 yield SimpleStoreEntry(
881 entry_path=u,
881 entry_path=u,
882 is_volatile=False,
882 is_volatile=False,
883 file_size=s,
883 file_size=s,
884 )
884 )
885 # yield manifest before changelog
885 # yield manifest before changelog
886 top_rl = [
886 top_rl = [
887 (manifestlogs, KIND_MANIFESTLOG),
887 (manifestlogs, KIND_MANIFESTLOG),
888 (changelogs, KIND_CHANGELOG),
888 (changelogs, KIND_CHANGELOG),
889 ]
889 ]
890 assert len(manifestlogs) <= 1
890 assert len(manifestlogs) <= 1
891 assert len(changelogs) <= 1
891 assert len(changelogs) <= 1
892 for data, revlog_type in top_rl:
892 for data, revlog_type in top_rl:
893 for revlog, details in sorted(data.items()):
893 for revlog, details in sorted(data.items()):
894 yield RevlogStoreEntry(
894 yield RevlogStoreEntry(
895 path_prefix=revlog,
895 path_prefix=revlog,
896 revlog_type=revlog_type,
896 revlog_type=revlog_type,
897 target_id=b'',
897 target_id=b'',
898 details=details,
898 details=details,
899 )
899 )
900
900
901 def walk(
901 def walk(
902 self, matcher=None, phase=False, obsolescence=False
902 self, matcher=None, phase=False, obsolescence=False
903 ) -> Generator[BaseStoreEntry, None, None]:
903 ) -> Generator[BaseStoreEntry, None, None]:
904 """return files related to data storage (ie: revlogs)
904 """return files related to data storage (ie: revlogs)
905
905
906 yields instance from BaseStoreEntry subclasses
906 yields instance from BaseStoreEntry subclasses
907
907
908 if a matcher is passed, storage files of only those tracked paths
908 if a matcher is passed, storage files of only those tracked paths
909 are passed with matches the matcher
909 are passed with matches the matcher
910 """
910 """
911 # yield data files first
911 # yield data files first
912 for x in self.data_entries(matcher):
912 for x in self.data_entries(matcher):
913 yield x
913 yield x
914 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
914 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
915 yield x
915 yield x
916
916
917 def copylist(self):
917 def copylist(self):
918 return _data
918 return _data
919
919
920 def write(self, tr):
920 def write(self, tr):
921 pass
921 pass
922
922
923 def invalidatecaches(self):
923 def invalidatecaches(self):
924 pass
924 pass
925
925
926 def markremoved(self, fn):
926 def markremoved(self, fn):
927 pass
927 pass
928
928
929 def __contains__(self, path):
929 def __contains__(self, path):
930 '''Checks if the store contains path'''
930 '''Checks if the store contains path'''
931 path = b"/".join((b"data", path))
931 path = b"/".join((b"data", path))
932 # file?
932 # file?
933 if self.vfs.exists(path + b".i"):
933 if self.vfs.exists(path + b".i"):
934 return True
934 return True
935 # dir?
935 # dir?
936 if not path.endswith(b"/"):
936 if not path.endswith(b"/"):
937 path = path + b"/"
937 path = path + b"/"
938 return self.vfs.exists(path)
938 return self.vfs.exists(path)
939
939
940
940
941 class encodedstore(basicstore):
941 class encodedstore(basicstore):
942 def __init__(self, path, vfstype):
942 def __init__(self, path, vfstype):
943 vfs = vfstype(path + b'/store')
943 vfs = vfstype(path + b'/store')
944 self.path = vfs.base
944 self.path = vfs.base
945 self.createmode = _calcmode(vfs)
945 self.createmode = _calcmode(vfs)
946 vfs.createmode = self.createmode
946 vfs.createmode = self.createmode
947 self.rawvfs = vfs
947 self.rawvfs = vfs
948 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
948 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
949 self.opener = self.vfs
949 self.opener = self.vfs
950
950
951 def _walk(self, relpath, recurse, undecodable=None):
951 def _walk(self, relpath, recurse, undecodable=None):
952 old = super()._walk(relpath, recurse)
952 old = super()._walk(relpath, recurse)
953 new = []
953 new = []
954 for f1, value in old:
954 for f1, value in old:
955 try:
955 try:
956 f2 = decodefilename(f1)
956 f2 = decodefilename(f1)
957 except KeyError:
957 except KeyError:
958 if undecodable is None:
958 if undecodable is None:
959 msg = _(b'undecodable revlog name %s') % f1
959 msg = _(b'undecodable revlog name %s') % f1
960 raise error.StorageError(msg)
960 raise error.StorageError(msg)
961 else:
961 else:
962 undecodable.append(f1)
962 undecodable.append(f1)
963 continue
963 continue
964 new.append((f2, value))
964 new.append((f2, value))
965 return new
965 return new
966
966
967 def data_entries(
967 def data_entries(
968 self, matcher=None, undecodable=None
968 self, matcher=None, undecodable=None
969 ) -> Generator[BaseStoreEntry, None, None]:
969 ) -> Generator[BaseStoreEntry, None, None]:
970 entries = super(encodedstore, self).data_entries(
970 entries = super(encodedstore, self).data_entries(
971 undecodable=undecodable
971 undecodable=undecodable
972 )
972 )
973 for entry in entries:
973 for entry in entries:
974 if _match_tracked_entry(entry, matcher):
974 if _match_tracked_entry(entry, matcher):
975 yield entry
975 yield entry
976
976
977 def join(self, f):
977 def join(self, f):
978 return self.path + b'/' + encodefilename(f)
978 return self.path + b'/' + encodefilename(f)
979
979
980 def copylist(self):
980 def copylist(self):
981 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
981 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
982
982
983
983
984 class fncache:
984 class fncache:
985 # the filename used to be partially encoded
985 # the filename used to be partially encoded
986 # hence the encodedir/decodedir dance
986 # hence the encodedir/decodedir dance
987 def __init__(self, vfs):
987 def __init__(self, vfs):
988 self.vfs = vfs
988 self.vfs = vfs
989 self._ignores = set()
989 self._ignores = set()
990 self.entries = None
990 self.entries = None
991 self._dirty = False
991 self._dirty = False
992 # set of new additions to fncache
992 # set of new additions to fncache
993 self.addls = set()
993 self.addls = set()
994
994
995 def ensureloaded(self, warn=None):
995 def ensureloaded(self, warn=None):
996 """read the fncache file if not already read.
996 """read the fncache file if not already read.
997
997
998 If the file on disk is corrupted, raise. If warn is provided,
998 If the file on disk is corrupted, raise. If warn is provided,
999 warn and keep going instead."""
999 warn and keep going instead."""
1000 if self.entries is None:
1000 if self.entries is None:
1001 self._load(warn)
1001 self._load(warn)
1002
1002
1003 def _load(self, warn=None):
1003 def _load(self, warn=None):
1004 '''fill the entries from the fncache file'''
1004 '''fill the entries from the fncache file'''
1005 self._dirty = False
1005 self._dirty = False
1006 try:
1006 try:
1007 fp = self.vfs(b'fncache', mode=b'rb')
1007 fp = self.vfs(b'fncache', mode=b'rb')
1008 except IOError:
1008 except IOError:
1009 # skip nonexistent file
1009 # skip nonexistent file
1010 self.entries = set()
1010 self.entries = set()
1011 return
1011 return
1012
1012
1013 self.entries = set()
1013 self.entries = set()
1014 chunk = b''
1014 chunk = b''
1015 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
1015 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
1016 chunk += c
1016 chunk += c
1017 try:
1017 try:
1018 p = chunk.rindex(b'\n')
1018 p = chunk.rindex(b'\n')
1019 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
1019 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
1020 chunk = chunk[p + 1 :]
1020 chunk = chunk[p + 1 :]
1021 except ValueError:
1021 except ValueError:
1022 # substring '\n' not found, maybe the entry is bigger than the
1022 # substring '\n' not found, maybe the entry is bigger than the
1023 # chunksize, so let's keep iterating
1023 # chunksize, so let's keep iterating
1024 pass
1024 pass
1025
1025
1026 if chunk:
1026 if chunk:
1027 msg = _(b"fncache does not ends with a newline")
1027 msg = _(b"fncache does not ends with a newline")
1028 if warn:
1028 if warn:
1029 warn(msg + b'\n')
1029 warn(msg + b'\n')
1030 else:
1030 else:
1031 raise error.Abort(
1031 raise error.Abort(
1032 msg,
1032 msg,
1033 hint=_(
1033 hint=_(
1034 b"use 'hg debugrebuildfncache' to "
1034 b"use 'hg debugrebuildfncache' to "
1035 b"rebuild the fncache"
1035 b"rebuild the fncache"
1036 ),
1036 ),
1037 )
1037 )
1038 self._checkentries(fp, warn)
1038 self._checkentries(fp, warn)
1039 fp.close()
1039 fp.close()
1040
1040
1041 def _checkentries(self, fp, warn):
1041 def _checkentries(self, fp, warn):
1042 """make sure there is no empty string in entries"""
1042 """make sure there is no empty string in entries"""
1043 if b'' in self.entries:
1043 if b'' in self.entries:
1044 fp.seek(0)
1044 fp.seek(0)
1045 for n, line in enumerate(fp):
1045 for n, line in enumerate(fp):
1046 if not line.rstrip(b'\n'):
1046 if not line.rstrip(b'\n'):
1047 t = _(b'invalid entry in fncache, line %d') % (n + 1)
1047 t = _(b'invalid entry in fncache, line %d') % (n + 1)
1048 if warn:
1048 if warn:
1049 warn(t + b'\n')
1049 warn(t + b'\n')
1050 else:
1050 else:
1051 raise error.Abort(t)
1051 raise error.Abort(t)
1052
1052
1053 def write(self, tr):
1053 def write(self, tr):
1054 if self._dirty:
1054 if self._dirty:
1055 assert self.entries is not None
1055 assert self.entries is not None
1056 self.entries = self.entries | self.addls
1056 self.entries = self.entries | self.addls
1057 self.addls = set()
1057 self.addls = set()
1058 tr.addbackup(b'fncache')
1058 tr.addbackup(b'fncache')
1059 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
1059 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
1060 if self.entries:
1060 if self.entries:
1061 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
1061 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
1062 fp.close()
1062 fp.close()
1063 self._dirty = False
1063 self._dirty = False
1064 if self.addls:
1064 if self.addls:
1065 # if we have just new entries, let's append them to the fncache
1065 # if we have just new entries, let's append them to the fncache
1066 tr.addbackup(b'fncache')
1066 tr.addbackup(b'fncache')
1067 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
1067 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
1068 if self.addls:
1068 if self.addls:
1069 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
1069 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
1070 fp.close()
1070 fp.close()
1071 self.entries = None
1071 self.entries = None
1072 self.addls = set()
1072 self.addls = set()
1073
1073
1074 def addignore(self, fn):
1074 def addignore(self, fn):
1075 self._ignores.add(fn)
1075 self._ignores.add(fn)
1076
1076
1077 def add(self, fn):
1077 def add(self, fn):
1078 if fn in self._ignores:
1078 if fn in self._ignores:
1079 return
1079 return
1080 if self.entries is None:
1080 if self.entries is None:
1081 self._load()
1081 self._load()
1082 if fn not in self.entries:
1082 if fn not in self.entries:
1083 self.addls.add(fn)
1083 self.addls.add(fn)
1084
1084
1085 def remove(self, fn):
1085 def remove(self, fn):
1086 if self.entries is None:
1086 if self.entries is None:
1087 self._load()
1087 self._load()
1088 if fn in self.addls:
1088 if fn in self.addls:
1089 self.addls.remove(fn)
1089 self.addls.remove(fn)
1090 return
1090 return
1091 try:
1091 try:
1092 self.entries.remove(fn)
1092 self.entries.remove(fn)
1093 self._dirty = True
1093 self._dirty = True
1094 except KeyError:
1094 except KeyError:
1095 pass
1095 pass
1096
1096
1097 def __contains__(self, fn):
1097 def __contains__(self, fn):
1098 if fn in self.addls:
1098 if fn in self.addls:
1099 return True
1099 return True
1100 if self.entries is None:
1100 if self.entries is None:
1101 self._load()
1101 self._load()
1102 return fn in self.entries
1102 return fn in self.entries
1103
1103
1104 def __iter__(self):
1104 def __iter__(self):
1105 if self.entries is None:
1105 if self.entries is None:
1106 self._load()
1106 self._load()
1107 return iter(self.entries | self.addls)
1107 return iter(self.entries | self.addls)
1108
1108
1109
1109
1110 class _fncachevfs(vfsmod.proxyvfs):
1110 class _fncachevfs(vfsmod.proxyvfs):
1111 def __init__(self, vfs, fnc, encode):
1111 def __init__(self, vfs, fnc, encode):
1112 vfsmod.proxyvfs.__init__(self, vfs)
1112 vfsmod.proxyvfs.__init__(self, vfs)
1113 self.fncache = fnc
1113 self.fncache = fnc
1114 self.encode = encode
1114 self.encode = encode
1115
1115
1116 def __call__(self, path, mode=b'r', *args, **kw):
1116 def __call__(self, path, mode=b'r', *args, **kw):
1117 encoded = self.encode(path)
1117 encoded = self.encode(path)
1118 if (
1118 if (
1119 mode not in (b'r', b'rb')
1119 mode not in (b'r', b'rb')
1120 and (path.startswith(b'data/') or path.startswith(b'meta/'))
1120 and (path.startswith(b'data/') or path.startswith(b'meta/'))
1121 and is_revlog_file(path)
1121 and is_revlog_file(path)
1122 ):
1122 ):
1123 # do not trigger a fncache load when adding a file that already is
1123 # do not trigger a fncache load when adding a file that already is
1124 # known to exist.
1124 # known to exist.
1125 notload = self.fncache.entries is None and (
1125 notload = self.fncache.entries is None and (
1126 # if the file has size zero, it should be considered as missing.
1126 # if the file has size zero, it should be considered as missing.
1127 # Such zero-size files are the result of truncation when a
1127 # Such zero-size files are the result of truncation when a
1128 # transaction is aborted.
1128 # transaction is aborted.
1129 self.vfs.exists(encoded)
1129 self.vfs.exists(encoded)
1130 and self.vfs.stat(encoded).st_size
1130 and self.vfs.stat(encoded).st_size
1131 )
1131 )
1132 if not notload:
1132 if not notload:
1133 self.fncache.add(path)
1133 self.fncache.add(path)
1134 return self.vfs(encoded, mode, *args, **kw)
1134 return self.vfs(encoded, mode, *args, **kw)
1135
1135
1136 def join(self, path):
1136 def join(self, path):
1137 if path:
1137 if path:
1138 return self.vfs.join(self.encode(path))
1138 return self.vfs.join(self.encode(path))
1139 else:
1139 else:
1140 return self.vfs.join(path)
1140 return self.vfs.join(path)
1141
1141
1142 def register_file(self, path):
1142 def register_file(self, path):
1143 """generic hook point to lets fncache steer its stew"""
1143 """generic hook point to lets fncache steer its stew"""
1144 if path.startswith(b'data/') or path.startswith(b'meta/'):
1144 if path.startswith(b'data/') or path.startswith(b'meta/'):
1145 self.fncache.add(path)
1145 self.fncache.add(path)
1146
1146
1147
1147
1148 class fncachestore(basicstore):
1148 class fncachestore(basicstore):
1149 def __init__(self, path, vfstype, dotencode):
1149 def __init__(self, path, vfstype, dotencode):
1150 if dotencode:
1150 if dotencode:
1151 encode = _pathencode
1151 encode = _pathencode
1152 else:
1152 else:
1153 encode = _plainhybridencode
1153 encode = _plainhybridencode
1154 self.encode = encode
1154 self.encode = encode
1155 vfs = vfstype(path + b'/store')
1155 vfs = vfstype(path + b'/store')
1156 self.path = vfs.base
1156 self.path = vfs.base
1157 self.pathsep = self.path + b'/'
1157 self.pathsep = self.path + b'/'
1158 self.createmode = _calcmode(vfs)
1158 self.createmode = _calcmode(vfs)
1159 vfs.createmode = self.createmode
1159 vfs.createmode = self.createmode
1160 self.rawvfs = vfs
1160 self.rawvfs = vfs
1161 fnc = fncache(vfs)
1161 fnc = fncache(vfs)
1162 self.fncache = fnc
1162 self.fncache = fnc
1163 self.vfs = _fncachevfs(vfs, fnc, encode)
1163 self.vfs = _fncachevfs(vfs, fnc, encode)
1164 self.opener = self.vfs
1164 self.opener = self.vfs
1165
1165
1166 def join(self, f):
1166 def join(self, f):
1167 return self.pathsep + self.encode(f)
1167 return self.pathsep + self.encode(f)
1168
1168
1169 def getsize(self, path):
1169 def getsize(self, path):
1170 return self.rawvfs.stat(path).st_size
1170 return self.rawvfs.stat(path).st_size
1171
1171
1172 def data_entries(
1172 def data_entries(
1173 self, matcher=None, undecodable=None
1173 self, matcher=None, undecodable=None
1174 ) -> Generator[BaseStoreEntry, None, None]:
1174 ) -> Generator[BaseStoreEntry, None, None]:
1175 # Note: all files in fncache should be revlog related, However the
1175 # Note: all files in fncache should be revlog related, However the
1176 # fncache might contains such file added by previous version of
1176 # fncache might contains such file added by previous version of
1177 # Mercurial.
1177 # Mercurial.
1178 files = ((f, None) for f in self.fncache if is_revlog_file(f))
1178 files = ((f, None) for f in self.fncache if is_revlog_file(f))
1179 by_revlog = _gather_revlog(files)
1179 by_revlog = _gather_revlog(files)
1180 for revlog, details in by_revlog:
1180 for revlog, details in by_revlog:
1181 if revlog.startswith(b'data/'):
1181 if revlog.startswith(b'data/'):
1182 rl_type = KIND_FILELOG
1182 rl_type = KIND_FILELOG
1183 revlog_target_id = revlog.split(b'/', 1)[1]
1183 revlog_target_id = revlog.split(b'/', 1)[1]
1184 elif revlog.startswith(b'meta/'):
1184 elif revlog.startswith(b'meta/'):
1185 rl_type = KIND_MANIFESTLOG
1185 rl_type = KIND_MANIFESTLOG
1186 # drop the initial directory and the `00manifest` file part
1186 # drop the initial directory and the `00manifest` file part
1187 tmp = revlog.split(b'/', 1)[1]
1187 tmp = revlog.split(b'/', 1)[1]
1188 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1188 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1189 else:
1189 else:
1190 # unreachable
1190 # unreachable
1191 assert False, revlog
1191 assert False, revlog
1192 entry = RevlogStoreEntry(
1192 entry = RevlogStoreEntry(
1193 path_prefix=revlog,
1193 path_prefix=revlog,
1194 revlog_type=rl_type,
1194 revlog_type=rl_type,
1195 target_id=revlog_target_id,
1195 target_id=revlog_target_id,
1196 details=details,
1196 details=details,
1197 )
1197 )
1198 if _match_tracked_entry(entry, matcher):
1198 if _match_tracked_entry(entry, matcher):
1199 yield entry
1199 yield entry
1200
1200
1201 def copylist(self):
1201 def copylist(self):
1202 d = (
1202 d = (
1203 b'bookmarks',
1203 b'bookmarks',
1204 b'narrowspec',
1204 b'narrowspec',
1205 b'data',
1205 b'data',
1206 b'meta',
1206 b'meta',
1207 b'dh',
1207 b'dh',
1208 b'fncache',
1208 b'fncache',
1209 b'phaseroots',
1209 b'phaseroots',
1210 b'obsstore',
1210 b'obsstore',
1211 b'00manifest.d',
1211 b'00manifest.d',
1212 b'00manifest.i',
1212 b'00manifest.i',
1213 b'00changelog.d',
1213 b'00changelog.d',
1214 b'00changelog.i',
1214 b'00changelog.i',
1215 b'requires',
1215 b'requires',
1216 )
1216 )
1217 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1217 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1218
1218
1219 def write(self, tr):
1219 def write(self, tr):
1220 self.fncache.write(tr)
1220 self.fncache.write(tr)
1221
1221
1222 def invalidatecaches(self):
1222 def invalidatecaches(self):
1223 self.fncache.entries = None
1223 self.fncache.entries = None
1224 self.fncache.addls = set()
1224 self.fncache.addls = set()
1225
1225
1226 def markremoved(self, fn):
1226 def markremoved(self, fn):
1227 self.fncache.remove(fn)
1227 self.fncache.remove(fn)
1228
1228
1229 def _exists(self, f):
1229 def _exists(self, f):
1230 ef = self.encode(f)
1230 ef = self.encode(f)
1231 try:
1231 try:
1232 self.getsize(ef)
1232 self.getsize(ef)
1233 return True
1233 return True
1234 except FileNotFoundError:
1234 except FileNotFoundError:
1235 return False
1235 return False
1236
1236
1237 def __contains__(self, path):
1237 def __contains__(self, path):
1238 '''Checks if the store contains path'''
1238 '''Checks if the store contains path'''
1239 path = b"/".join((b"data", path))
1239 path = b"/".join((b"data", path))
1240 # check for files (exact match)
1240 # check for files (exact match)
1241 e = path + b'.i'
1241 e = path + b'.i'
1242 if e in self.fncache and self._exists(e):
1242 if e in self.fncache and self._exists(e):
1243 return True
1243 return True
1244 # now check for directories (prefix match)
1244 # now check for directories (prefix match)
1245 if not path.endswith(b'/'):
1245 if not path.endswith(b'/'):
1246 path += b'/'
1246 path += b'/'
1247 for e in self.fncache:
1247 for e in self.fncache:
1248 if e.startswith(path) and self._exists(e):
1248 if e.startswith(path) and self._exists(e):
1249 return True
1249 return True
1250 return False
1250 return False
@@ -1,344 +1,351 b''
1 # unionrepo.py - repository class for viewing union of repository changesets
1 # unionrepo.py - repository class for viewing union of repository changesets
2 #
2 #
3 # Derived from bundlerepo.py
3 # Derived from bundlerepo.py
4 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
4 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
5 # Copyright 2013 Unity Technologies, Mads Kiilerich <madski@unity3d.com>
5 # Copyright 2013 Unity Technologies, Mads Kiilerich <madski@unity3d.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Repository class for "in-memory pull" of one local repository to another,
10 """Repository class for "in-memory pull" of one local repository to another,
11 allowing operations like diff and log with revsets.
11 allowing operations like diff and log with revsets.
12 """
12 """
13
13
14 import contextlib
14 import contextlib
15
15
16
16
17 from .i18n import _
17 from .i18n import _
18
18
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 cmdutil,
21 cmdutil,
22 encoding,
22 encoding,
23 error,
23 error,
24 filelog,
24 filelog,
25 localrepo,
25 localrepo,
26 manifest,
26 manifest,
27 mdiff,
27 mdiff,
28 pathutil,
28 pathutil,
29 revlog,
29 revlog,
30 util,
30 util,
31 vfs as vfsmod,
31 vfs as vfsmod,
32 )
32 )
33
33
34 from .revlogutils import (
34 from .revlogutils import (
35 constants as revlog_constants,
35 constants as revlog_constants,
36 )
36 )
37
37
38
38
39 class unionrevlog(revlog.revlog):
39 class unionrevlog(revlog.revlog):
40 def __init__(self, opener, radix, revlog2, linkmapper):
40 def __init__(self, opener, radix, revlog2, linkmapper):
41 # How it works:
41 # How it works:
42 # To retrieve a revision, we just need to know the node id so we can
42 # To retrieve a revision, we just need to know the node id so we can
43 # look it up in revlog2.
43 # look it up in revlog2.
44 #
44 #
45 # To differentiate a rev in the second revlog from a rev in the revlog,
45 # To differentiate a rev in the second revlog from a rev in the revlog,
46 # we check revision against repotiprev.
46 # we check revision against repotiprev.
47 opener = vfsmod.readonlyvfs(opener)
47 opener = vfsmod.readonlyvfs(opener)
48 target = getattr(revlog2, 'target', None)
48 target = getattr(revlog2, 'target', None)
49 if target is None:
49 if target is None:
50 # a revlog wrapper, eg: the manifestlog that is not an actual revlog
50 # a revlog wrapper, eg: the manifestlog that is not an actual revlog
51 target = revlog2._revlog.target
51 target = revlog2._revlog.target
52 revlog.revlog.__init__(self, opener, target=target, radix=radix)
52 revlog.revlog.__init__(self, opener, target=target, radix=radix)
53 self.revlog2 = revlog2
53 self.revlog2 = revlog2
54
54
55 n = len(self)
55 n = len(self)
56 self.repotiprev = n - 1
56 self.repotiprev = n - 1
57 self.bundlerevs = set() # used by 'bundle()' revset expression
57 self.bundlerevs = set() # used by 'bundle()' revset expression
58 for rev2 in self.revlog2:
58 for rev2 in self.revlog2:
59 rev = self.revlog2.index[rev2]
59 rev = self.revlog2.index[rev2]
60 # rev numbers - in revlog2, very different from self.rev
60 # rev numbers - in revlog2, very different from self.rev
61 (
61 (
62 _start,
62 _start,
63 _csize,
63 _csize,
64 rsize,
64 rsize,
65 base,
65 base,
66 linkrev,
66 linkrev,
67 p1rev,
67 p1rev,
68 p2rev,
68 p2rev,
69 node,
69 node,
70 _sdo,
70 _sdo,
71 _sds,
71 _sds,
72 _dcm,
72 _dcm,
73 _sdcm,
73 _sdcm,
74 rank,
74 rank,
75 ) = rev
75 ) = rev
76 flags = _start & 0xFFFF
76 flags = _start & 0xFFFF
77
77
78 if linkmapper is None: # link is to same revlog
78 if linkmapper is None: # link is to same revlog
79 assert linkrev == rev2 # we never link back
79 assert linkrev == rev2 # we never link back
80 link = n
80 link = n
81 else: # rev must be mapped from repo2 cl to unified cl by linkmapper
81 else: # rev must be mapped from repo2 cl to unified cl by linkmapper
82 link = linkmapper(linkrev)
82 link = linkmapper(linkrev)
83
83
84 if linkmapper is not None: # link is to same revlog
84 if linkmapper is not None: # link is to same revlog
85 base = linkmapper(base)
85 base = linkmapper(base)
86
86
87 this_rev = self.index.get_rev(node)
87 this_rev = self.index.get_rev(node)
88 if this_rev is not None:
88 if this_rev is not None:
89 # this happens for the common revlog revisions
89 # this happens for the common revlog revisions
90 self.bundlerevs.add(this_rev)
90 self.bundlerevs.add(this_rev)
91 continue
91 continue
92
92
93 p1node = self.revlog2.node(p1rev)
93 p1node = self.revlog2.node(p1rev)
94 p2node = self.revlog2.node(p2rev)
94 p2node = self.revlog2.node(p2rev)
95
95
96 # TODO: it's probably wrong to set compressed length to -1, but
96 # TODO: it's probably wrong to set compressed length to -1, but
97 # I have no idea if csize is valid in the base revlog context.
97 # I have no idea if csize is valid in the base revlog context.
98 e = (
98 e = (
99 flags,
99 flags,
100 -1,
100 -1,
101 rsize,
101 rsize,
102 base,
102 base,
103 link,
103 link,
104 self.rev(p1node),
104 self.rev(p1node),
105 self.rev(p2node),
105 self.rev(p2node),
106 node,
106 node,
107 0, # sidedata offset
107 0, # sidedata offset
108 0, # sidedata size
108 0, # sidedata size
109 revlog_constants.COMP_MODE_INLINE,
109 revlog_constants.COMP_MODE_INLINE,
110 revlog_constants.COMP_MODE_INLINE,
110 revlog_constants.COMP_MODE_INLINE,
111 rank,
111 rank,
112 )
112 )
113 self.index.append(e)
113 self.index.append(e)
114 self.bundlerevs.add(n)
114 self.bundlerevs.add(n)
115 n += 1
115 n += 1
116
116
117 @contextlib.contextmanager
117 @contextlib.contextmanager
118 def reading(self):
118 def reading(self):
119 if 0 <= len(self.bundlerevs) < len(self.index):
119 if 0 <= len(self.bundlerevs) < len(self.index):
120 read_1 = super().reading
120 read_1 = super().reading
121 else:
121 else:
122 read_1 = util.nullcontextmanager
122 read_1 = util.nullcontextmanager
123 if 0 < len(self.bundlerevs):
123 if 0 < len(self.bundlerevs):
124 read_2 = self.revlog2.reading
124 read_2 = self.revlog2.reading
125 else:
125 else:
126 read_2 = util.nullcontextmanager
126 read_2 = util.nullcontextmanager
127 with read_1(), read_2():
127 with read_1(), read_2():
128 yield
128 yield
129
129
130 def _chunk(self, rev):
130 def _chunk(self, rev):
131 if rev <= self.repotiprev:
131 if rev <= self.repotiprev:
132 return revlog.revlog._chunk(self, rev)
132 return revlog.revlog._chunk(self, rev)
133 return self.revlog2._chunk(self.node(rev))
133 return self.revlog2._chunk(self.node(rev))
134
134
135 def revdiff(self, rev1, rev2):
135 def revdiff(self, rev1, rev2):
136 """return or calculate a delta between two revisions"""
136 """return or calculate a delta between two revisions"""
137 if rev1 > self.repotiprev and rev2 > self.repotiprev:
137 if rev1 > self.repotiprev and rev2 > self.repotiprev:
138 return self.revlog2.revdiff(
138 return self.revlog2.revdiff(
139 self.revlog2.rev(self.node(rev1)),
139 self.revlog2.rev(self.node(rev1)),
140 self.revlog2.rev(self.node(rev2)),
140 self.revlog2.rev(self.node(rev2)),
141 )
141 )
142 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
142 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
143 return super(unionrevlog, self).revdiff(rev1, rev2)
143 return super(unionrevlog, self).revdiff(rev1, rev2)
144
144
145 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
145 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
146
146
147 def _revisiondata(self, nodeorrev, raw=False):
147 def _revisiondata(self, nodeorrev, raw=False):
148 if isinstance(nodeorrev, int):
148 if isinstance(nodeorrev, int):
149 rev = nodeorrev
149 rev = nodeorrev
150 node = self.node(rev)
150 node = self.node(rev)
151 else:
151 else:
152 node = nodeorrev
152 node = nodeorrev
153 rev = self.rev(node)
153 rev = self.rev(node)
154
154
155 if rev > self.repotiprev:
155 if rev > self.repotiprev:
156 # work around manifestrevlog NOT being a revlog
156 # work around manifestrevlog NOT being a revlog
157 revlog2 = getattr(self.revlog2, '_revlog', self.revlog2)
157 revlog2 = getattr(self.revlog2, '_revlog', self.revlog2)
158 func = revlog2._revisiondata
158 func = revlog2._revisiondata
159 else:
159 else:
160 func = super(unionrevlog, self)._revisiondata
160 func = super(unionrevlog, self)._revisiondata
161 return func(node, raw=raw)
161 return func(node, raw=raw)
162
162
163 def addrevision(
163 def addrevision(
164 self,
164 self,
165 text,
165 text,
166 transaction,
166 transaction,
167 link,
167 link,
168 p1,
168 p1,
169 p2,
169 p2,
170 cachedelta=None,
170 cachedelta=None,
171 node=None,
171 node=None,
172 flags=revlog.REVIDX_DEFAULT_FLAGS,
172 flags=revlog.REVIDX_DEFAULT_FLAGS,
173 deltacomputer=None,
173 deltacomputer=None,
174 sidedata=None,
174 sidedata=None,
175 ):
175 ):
176 raise NotImplementedError
176 raise NotImplementedError
177
177
178 def addgroup(
178 def addgroup(
179 self,
179 self,
180 deltas,
180 deltas,
181 linkmapper,
181 linkmapper,
182 transaction,
182 transaction,
183 alwayscache=False,
183 alwayscache=False,
184 addrevisioncb=None,
184 addrevisioncb=None,
185 duplicaterevisioncb=None,
185 duplicaterevisioncb=None,
186 debug_info=None,
186 debug_info=None,
187 delta_base_reuse_policy=None,
187 delta_base_reuse_policy=None,
188 ):
188 ):
189 raise NotImplementedError
189 raise NotImplementedError
190
190
191 def strip(self, minlink, transaction):
191 def strip(self, minlink, transaction):
192 raise NotImplementedError
192 raise NotImplementedError
193
193
194 def checksize(self):
194 def checksize(self):
195 raise NotImplementedError
195 raise NotImplementedError
196
196
197
197
198 class unionchangelog(unionrevlog, changelog.changelog):
198 class unionchangelog(unionrevlog, changelog.changelog):
199 def __init__(self, opener, opener2):
199 def __init__(self, opener, opener2):
200 changelog.changelog.__init__(self, opener)
200 changelog.changelog.__init__(self, opener)
201 linkmapper = None
201 linkmapper = None
202 changelog2 = changelog.changelog(opener2)
202 changelog2 = changelog.changelog(opener2)
203 unionrevlog.__init__(self, opener, self.radix, changelog2, linkmapper)
203 unionrevlog.__init__(self, opener, self.radix, changelog2, linkmapper)
204
204
205
205
206 class unionmanifest(unionrevlog, manifest.manifestrevlog):
206 class unionmanifest(unionrevlog, manifest.manifestrevlog):
207 repotiprev: int
208 revlog2: manifest.ManifestRevlog
209
207 def __init__(self, nodeconstants, opener, opener2, linkmapper):
210 def __init__(self, nodeconstants, opener, opener2, linkmapper):
208 # XXX manifestrevlog is not actually a revlog , so mixing it with
211 # XXX manifestrevlog is not actually a revlog , so mixing it with
209 # bundlerevlog is not a good idea.
212 # bundlerevlog is not a good idea.
210 manifest.manifestrevlog.__init__(self, nodeconstants, opener)
213 manifest.manifestrevlog.__init__(self, nodeconstants, opener)
211 manifest2 = manifest.manifestrevlog(nodeconstants, opener2)
214 manifest2 = manifest.manifestrevlog(nodeconstants, opener2)
212 unionrevlog.__init__(
215 unionrevlog.__init__(
213 self, opener, self._revlog.radix, manifest2, linkmapper
216 self, opener, self._revlog.radix, manifest2, linkmapper
214 )
217 )
215
218
216
219
217 class unionfilelog(filelog.filelog):
220 class unionfilelog(filelog.filelog):
221 _revlog: unionrevlog
222 repotiprev: int
223 revlog2: revlog.revlog
224
218 def __init__(self, opener, path, opener2, linkmapper, repo):
225 def __init__(self, opener, path, opener2, linkmapper, repo):
219 filelog.filelog.__init__(self, opener, path)
226 filelog.filelog.__init__(self, opener, path)
220 filelog2 = filelog.filelog(opener2, path)
227 filelog2 = filelog.filelog(opener2, path)
221 self._revlog = unionrevlog(
228 self._revlog = unionrevlog(
222 opener, self._revlog.radix, filelog2._revlog, linkmapper
229 opener, self._revlog.radix, filelog2._revlog, linkmapper
223 )
230 )
224 self._repo = repo
231 self._repo = repo
225 self.repotiprev = self._revlog.repotiprev
232 self.repotiprev = self._revlog.repotiprev
226 self.revlog2 = self._revlog.revlog2
233 self.revlog2 = self._revlog.revlog2
227
234
228 def iscensored(self, rev):
235 def iscensored(self, rev):
229 """Check if a revision is censored."""
236 """Check if a revision is censored."""
230 if rev <= self.repotiprev:
237 if rev <= self.repotiprev:
231 return filelog.filelog.iscensored(self, rev)
238 return filelog.filelog.iscensored(self, rev)
232 node = self.node(rev)
239 node = self.node(rev)
233 return self.revlog2.iscensored(self.revlog2.rev(node))
240 return self.revlog2.iscensored(self.revlog2.rev(node))
234
241
235
242
236 class unionpeer(localrepo.localpeer):
243 class unionpeer(localrepo.localpeer):
237 def canpush(self):
244 def canpush(self):
238 return False
245 return False
239
246
240
247
241 class unionrepository:
248 class unionrepository:
242 """Represents the union of data in 2 repositories.
249 """Represents the union of data in 2 repositories.
243
250
244 Instances are not usable if constructed directly. Use ``instance()``
251 Instances are not usable if constructed directly. Use ``instance()``
245 or ``makeunionrepository()`` to create a usable instance.
252 or ``makeunionrepository()`` to create a usable instance.
246 """
253 """
247
254
248 def __init__(self, repo2, url):
255 def __init__(self, repo2, url):
249 self.repo2 = repo2
256 self.repo2 = repo2
250 self._url = url
257 self._url = url
251
258
252 self.ui.setconfig(b'phases', b'publish', False, b'unionrepo')
259 self.ui.setconfig(b'phases', b'publish', False, b'unionrepo')
253
260
254 @localrepo.unfilteredpropertycache
261 @localrepo.unfilteredpropertycache
255 def changelog(self):
262 def changelog(self):
256 return unionchangelog(self.svfs, self.repo2.svfs)
263 return unionchangelog(self.svfs, self.repo2.svfs)
257
264
258 @localrepo.unfilteredpropertycache
265 @localrepo.unfilteredpropertycache
259 def manifestlog(self):
266 def manifestlog(self):
260 rootstore = unionmanifest(
267 rootstore = unionmanifest(
261 self.nodeconstants,
268 self.nodeconstants,
262 self.svfs,
269 self.svfs,
263 self.repo2.svfs,
270 self.repo2.svfs,
264 self.unfiltered()._clrev,
271 self.unfiltered()._clrev,
265 )
272 )
266 return manifest.manifestlog(
273 return manifest.manifestlog(
267 self.svfs, self, rootstore, self.narrowmatch()
274 self.svfs, self, rootstore, self.narrowmatch()
268 )
275 )
269
276
270 def _clrev(self, rev2):
277 def _clrev(self, rev2):
271 """map from repo2 changelog rev to temporary rev in self.changelog"""
278 """map from repo2 changelog rev to temporary rev in self.changelog"""
272 node = self.repo2.changelog.node(rev2)
279 node = self.repo2.changelog.node(rev2)
273 return self.changelog.rev(node)
280 return self.changelog.rev(node)
274
281
275 def url(self):
282 def url(self):
276 return self._url
283 return self._url
277
284
278 def file(self, f):
285 def file(self, f):
279 return unionfilelog(
286 return unionfilelog(
280 self.svfs, f, self.repo2.svfs, self.unfiltered()._clrev, self
287 self.svfs, f, self.repo2.svfs, self.unfiltered()._clrev, self
281 )
288 )
282
289
283 def close(self):
290 def close(self):
284 self.repo2.close()
291 self.repo2.close()
285
292
286 def cancopy(self):
293 def cancopy(self):
287 return False
294 return False
288
295
289 def peer(self, path=None, remotehidden=False):
296 def peer(self, path=None, remotehidden=False):
290 return unionpeer(self, path=None, remotehidden=remotehidden)
297 return unionpeer(self, path=None, remotehidden=remotehidden)
291
298
292 def getcwd(self):
299 def getcwd(self):
293 return encoding.getcwd() # always outside the repo
300 return encoding.getcwd() # always outside the repo
294
301
295
302
296 def instance(ui, path, create, intents=None, createopts=None):
303 def instance(ui, path, create, intents=None, createopts=None):
297 if create:
304 if create:
298 raise error.Abort(_(b'cannot create new union repository'))
305 raise error.Abort(_(b'cannot create new union repository'))
299 parentpath = ui.config(b"bundle", b"mainreporoot")
306 parentpath = ui.config(b"bundle", b"mainreporoot")
300 if not parentpath:
307 if not parentpath:
301 # try to find the correct path to the working directory repo
308 # try to find the correct path to the working directory repo
302 parentpath = cmdutil.findrepo(encoding.getcwd())
309 parentpath = cmdutil.findrepo(encoding.getcwd())
303 if parentpath is None:
310 if parentpath is None:
304 parentpath = b''
311 parentpath = b''
305 if parentpath:
312 if parentpath:
306 # Try to make the full path relative so we get a nice, short URL.
313 # Try to make the full path relative so we get a nice, short URL.
307 # In particular, we don't want temp dir names in test outputs.
314 # In particular, we don't want temp dir names in test outputs.
308 cwd = encoding.getcwd()
315 cwd = encoding.getcwd()
309 if parentpath == cwd:
316 if parentpath == cwd:
310 parentpath = b''
317 parentpath = b''
311 else:
318 else:
312 cwd = pathutil.normasprefix(cwd)
319 cwd = pathutil.normasprefix(cwd)
313 if parentpath.startswith(cwd):
320 if parentpath.startswith(cwd):
314 parentpath = parentpath[len(cwd) :]
321 parentpath = parentpath[len(cwd) :]
315 if path.startswith(b'union:'):
322 if path.startswith(b'union:'):
316 s = path.split(b":", 1)[1].split(b"+", 1)
323 s = path.split(b":", 1)[1].split(b"+", 1)
317 if len(s) == 1:
324 if len(s) == 1:
318 repopath, repopath2 = parentpath, s[0]
325 repopath, repopath2 = parentpath, s[0]
319 else:
326 else:
320 repopath, repopath2 = s
327 repopath, repopath2 = s
321 else:
328 else:
322 repopath, repopath2 = parentpath, path
329 repopath, repopath2 = parentpath, path
323
330
324 return makeunionrepository(ui, repopath, repopath2)
331 return makeunionrepository(ui, repopath, repopath2)
325
332
326
333
327 def makeunionrepository(ui, repopath1, repopath2):
334 def makeunionrepository(ui, repopath1, repopath2):
328 """Make a union repository object from 2 local repo paths."""
335 """Make a union repository object from 2 local repo paths."""
329 repo1 = localrepo.instance(ui, repopath1, create=False)
336 repo1 = localrepo.instance(ui, repopath1, create=False)
330 repo2 = localrepo.instance(ui, repopath2, create=False)
337 repo2 = localrepo.instance(ui, repopath2, create=False)
331
338
332 url = b'union:%s+%s' % (
339 url = b'union:%s+%s' % (
333 util.expandpath(repopath1),
340 util.expandpath(repopath1),
334 util.expandpath(repopath2),
341 util.expandpath(repopath2),
335 )
342 )
336
343
337 class derivedunionrepository(unionrepository, repo1.__class__):
344 class derivedunionrepository(unionrepository, repo1.__class__):
338 pass
345 pass
339
346
340 repo = repo1
347 repo = repo1
341 repo.__class__ = derivedunionrepository
348 repo.__class__ = derivedunionrepository
342 unionrepository.__init__(repo1, repo2, url)
349 unionrepository.__init__(repo1, repo2, url)
343
350
344 return repo
351 return repo
General Comments 0
You need to be logged in to leave comments. Login now