##// END OF EJS Templates
typing: lock in new pytype gains from making revlog related classes typeable...
Matt Harbison -
r52719:0338fb20 default
parent child Browse files
Show More
@@ -1,480 +1,480 b''
1 1 # debugcommands.py - debug logic for remotefilelog
2 2 #
3 3 # Copyright 2013 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import os
9 9 import zlib
10 10
11 11 from mercurial.node import (
12 12 bin,
13 13 hex,
14 14 sha1nodeconstants,
15 15 short,
16 16 )
17 17 from mercurial.i18n import _
18 18 from mercurial.pycompat import open
19 19 from mercurial import (
20 20 error,
21 21 filelog,
22 22 lock as lockmod,
23 23 pycompat,
24 24 revlog,
25 25 )
26 26 from mercurial.utils import hashutil
27 27 from . import (
28 28 constants,
29 29 datapack,
30 30 fileserverclient,
31 31 historypack,
32 32 repack,
33 33 shallowutil,
34 34 )
35 35
36 36
37 37 def debugremotefilelog(ui, path, **opts) -> None:
38 38 decompress = opts.get('decompress')
39 39
40 40 size, firstnode, mapping = parsefileblob(path, decompress)
41 41
42 42 ui.status(_(b"size: %d bytes\n") % size)
43 43 ui.status(_(b"path: %s \n") % path)
44 44 ui.status(_(b"key: %s \n") % (short(firstnode)))
45 45 ui.status(_(b"\n"))
46 46 ui.status(
47 47 _(b"%12s => %12s %13s %13s %12s\n")
48 48 % (b"node", b"p1", b"p2", b"linknode", b"copyfrom")
49 49 )
50 50
51 51 queue = [firstnode]
52 52 while queue:
53 53 node = queue.pop(0)
54 54 p1, p2, linknode, copyfrom = mapping[node]
55 55 ui.status(
56 56 _(b"%s => %s %s %s %s\n")
57 57 % (short(node), short(p1), short(p2), short(linknode), copyfrom)
58 58 )
59 59 if p1 != sha1nodeconstants.nullid:
60 60 queue.append(p1)
61 61 if p2 != sha1nodeconstants.nullid:
62 62 queue.append(p2)
63 63
64 64
65 def buildtemprevlog(repo, file):
65 def buildtemprevlog(repo, file) -> filelog.FileLog:
66 66 # get filename key
67 67 filekey = hex(hashutil.sha1(file).digest())
68 68 filedir = os.path.join(repo.path, b'store/data', filekey)
69 69
70 70 # sort all entries based on linkrev
71 71 fctxs = []
72 72 for filenode in os.listdir(filedir):
73 73 if b'_old' not in filenode:
74 74 fctxs.append(repo.filectx(file, fileid=bin(filenode)))
75 75
76 76 fctxs = sorted(fctxs, key=lambda x: x.linkrev())
77 77
78 78 # add to revlog
79 79 temppath = repo.sjoin(b'data/temprevlog.i')
80 80 if os.path.exists(temppath):
81 81 os.remove(temppath)
82 82 r = filelog.filelog(repo.svfs, b'temprevlog')
83 83
84 84 class faket:
85 85 def add(self, a, b, c):
86 86 pass
87 87
88 88 t = faket()
89 89 for fctx in fctxs:
90 90 if fctx.node() not in repo:
91 91 continue
92 92
93 93 p = fctx.filelog().parents(fctx.filenode())
94 94 meta = {}
95 95 if fctx.renamed():
96 96 meta[b'copy'] = fctx.renamed()[0]
97 97 meta[b'copyrev'] = hex(fctx.renamed()[1])
98 98
99 99 r.add(fctx.data(), meta, t, fctx.linkrev(), p[0], p[1])
100 100
101 101 return r
102 102
103 103
104 104 def debugindex(orig, ui, repo, file_=None, **opts):
105 105 """dump the contents of an index file"""
106 106 if (
107 107 opts.get('changelog')
108 108 or opts.get('manifest')
109 109 or opts.get('dir')
110 110 or not shallowutil.isenabled(repo)
111 111 or not repo.shallowmatch(file_)
112 112 ):
113 113 return orig(ui, repo, file_, **opts)
114 114
115 115 r = buildtemprevlog(repo, file_)
116 116
117 117 # debugindex like normal
118 118 format = opts.get('format', 0)
119 119 if format not in (0, 1):
120 120 raise error.Abort(_(b"unknown format %d") % format)
121 121
122 122 generaldelta = r.get_revlog()._format_flags & revlog.FLAG_GENERALDELTA
123 123 if generaldelta:
124 124 basehdr = b' delta'
125 125 else:
126 126 basehdr = b' base'
127 127
128 128 if format == 0:
129 129 ui.write(
130 130 (
131 131 b" rev offset length " + basehdr + b" linkrev"
132 132 b" nodeid p1 p2\n"
133 133 )
134 134 )
135 135 elif format == 1:
136 136 ui.write(
137 137 (
138 138 b" rev flag offset length"
139 139 b" size " + basehdr + b" link p1 p2"
140 140 b" nodeid\n"
141 141 )
142 142 )
143 143
144 144 for i in r:
145 145 node = r.node(i)
146 146 if generaldelta:
147 147 base = r.get_revlog().deltaparent(i)
148 148 else:
149 149 base = r.get_revlog().chainbase(i)
150 150 if format == 0:
151 151 try:
152 152 pp = r.parents(node)
153 153 except Exception:
154 154 pp = [repo.nullid, repo.nullid]
155 155 ui.write(
156 156 b"% 6d % 9d % 7d % 6d % 7d %s %s %s\n"
157 157 % (
158 158 i,
159 159 r.get_revlog().start(i),
160 160 r.get_revlog().length(i),
161 161 base,
162 162 r.linkrev(i),
163 163 short(node),
164 164 short(pp[0]),
165 165 short(pp[1]),
166 166 )
167 167 )
168 168 elif format == 1:
169 169 pr = r.parentrevs(i)
170 170 ui.write(
171 171 b"% 6d %04x % 8d % 8d % 8d % 6d % 6d % 6d % 6d %s\n"
172 172 % (
173 173 i,
174 174 r.get_revlog().flags(i),
175 175 r.get_revlog().start(i),
176 176 r.get_revlog().length(i),
177 177 r.get_revlog().rawsize(i),
178 178 base,
179 179 r.linkrev(i),
180 180 pr[0],
181 181 pr[1],
182 182 short(node),
183 183 )
184 184 )
185 185
186 186
187 187 def debugindexdot(orig, ui, repo, file_):
188 188 """dump an index DAG as a graphviz dot file"""
189 189 if not shallowutil.isenabled(repo):
190 190 return orig(ui, repo, file_)
191 191
192 192 r = buildtemprevlog(repo, os.path.basename(file_)[:-2])
193 193
194 194 ui.writenoi18n(b"digraph G {\n")
195 195 for i in r:
196 196 node = r.node(i)
197 197 pp = r.parents(node)
198 198 ui.write(b"\t%d -> %d\n" % (r.rev(pp[0]), i))
199 199 if pp[1] != repo.nullid:
200 200 ui.write(b"\t%d -> %d\n" % (r.rev(pp[1]), i))
201 201 ui.write(b"}\n")
202 202
203 203
204 204 def verifyremotefilelog(ui, path, **opts):
205 205 decompress = opts.get('decompress')
206 206
207 207 for root, dirs, files in os.walk(path):
208 208 for file in files:
209 209 if file == b"repos":
210 210 continue
211 211 filepath = os.path.join(root, file)
212 212 size, firstnode, mapping = parsefileblob(filepath, decompress)
213 213 for p1, p2, linknode, copyfrom in mapping.values():
214 214 if linknode == sha1nodeconstants.nullid:
215 215 actualpath = os.path.relpath(root, path)
216 216 key = fileserverclient.getcachekey(
217 217 b"reponame", actualpath, file
218 218 )
219 219 ui.status(
220 220 b"%s %s\n" % (key, os.path.relpath(filepath, path))
221 221 )
222 222
223 223
224 224 def _decompressblob(raw):
225 225 return zlib.decompress(raw)
226 226
227 227
228 228 def parsefileblob(path, decompress):
229 229 f = open(path, b"rb")
230 230 try:
231 231 raw = f.read()
232 232 finally:
233 233 f.close()
234 234
235 235 if decompress:
236 236 raw = _decompressblob(raw)
237 237
238 238 offset, size, flags = shallowutil.parsesizeflags(raw)
239 239 start = offset + size
240 240
241 241 firstnode = None
242 242
243 243 mapping = {}
244 244 while start < len(raw):
245 245 divider = raw.index(b'\0', start + 80)
246 246
247 247 currentnode = raw[start : (start + 20)]
248 248 if not firstnode:
249 249 firstnode = currentnode
250 250
251 251 p1 = raw[(start + 20) : (start + 40)]
252 252 p2 = raw[(start + 40) : (start + 60)]
253 253 linknode = raw[(start + 60) : (start + 80)]
254 254 copyfrom = raw[(start + 80) : divider]
255 255
256 256 mapping[currentnode] = (p1, p2, linknode, copyfrom)
257 257 start = divider + 1
258 258
259 259 return size, firstnode, mapping
260 260
261 261
262 262 def debugdatapack(ui, *paths, **opts):
263 263 for path in paths:
264 264 if b'.data' in path:
265 265 path = path[: path.index(b'.data')]
266 266 ui.write(b"%s:\n" % path)
267 267 dpack = datapack.datapack(path)
268 268 node = opts.get('node')
269 269 if node:
270 270 deltachain = dpack.getdeltachain(b'', bin(node))
271 271 dumpdeltachain(ui, deltachain, **opts)
272 272 return
273 273
274 274 if opts.get('long'):
275 275 hashformatter = hex
276 276 hashlen = 42
277 277 else:
278 278 hashformatter = short
279 279 hashlen = 14
280 280
281 281 lastfilename = None
282 282 totaldeltasize = 0
283 283 totalblobsize = 0
284 284
285 285 def printtotals():
286 286 if lastfilename is not None:
287 287 ui.write(b"\n")
288 288 if not totaldeltasize or not totalblobsize:
289 289 return
290 290 difference = totalblobsize - totaldeltasize
291 291 deltastr = b"%0.1f%% %s" % (
292 292 (100.0 * abs(difference) / totalblobsize),
293 293 (b"smaller" if difference > 0 else b"bigger"),
294 294 )
295 295
296 296 ui.writenoi18n(
297 297 b"Total:%s%s %s (%s)\n"
298 298 % (
299 299 b"".ljust(2 * hashlen - len(b"Total:")),
300 300 (b'%d' % totaldeltasize).ljust(12),
301 301 (b'%d' % totalblobsize).ljust(9),
302 302 deltastr,
303 303 )
304 304 )
305 305
306 306 bases = {}
307 307 nodes = set()
308 308 failures = 0
309 309 for filename, node, deltabase, deltalen in dpack.iterentries():
310 310 bases[node] = deltabase
311 311 if node in nodes:
312 312 ui.write((b"Bad entry: %s appears twice\n" % short(node)))
313 313 failures += 1
314 314 nodes.add(node)
315 315 if filename != lastfilename:
316 316 printtotals()
317 317 name = b'(empty name)' if filename == b'' else filename
318 318 ui.write(b"%s:\n" % name)
319 319 ui.write(
320 320 b"%s%s%s%s\n"
321 321 % (
322 322 b"Node".ljust(hashlen),
323 323 b"Delta Base".ljust(hashlen),
324 324 b"Delta Length".ljust(14),
325 325 b"Blob Size".ljust(9),
326 326 )
327 327 )
328 328 lastfilename = filename
329 329 totalblobsize = 0
330 330 totaldeltasize = 0
331 331
332 332 # Metadata could be missing, in which case it will be an empty dict.
333 333 meta = dpack.getmeta(filename, node)
334 334 if constants.METAKEYSIZE in meta:
335 335 blobsize = meta[constants.METAKEYSIZE]
336 336 totaldeltasize += deltalen
337 337 totalblobsize += blobsize
338 338 else:
339 339 blobsize = b"(missing)"
340 340 ui.write(
341 341 b"%s %s %s%s\n"
342 342 % (
343 343 hashformatter(node),
344 344 hashformatter(deltabase),
345 345 (b'%d' % deltalen).ljust(14),
346 346 pycompat.bytestr(blobsize),
347 347 )
348 348 )
349 349
350 350 if filename is not None:
351 351 printtotals()
352 352
353 353 failures += _sanitycheck(ui, set(nodes), bases)
354 354 if failures > 1:
355 355 ui.warn((b"%d failures\n" % failures))
356 356 return 1
357 357
358 358
359 359 def _sanitycheck(ui, nodes, bases):
360 360 """
361 361 Does some basic sanity checking on a packfiles with ``nodes`` ``bases`` (a
362 362 mapping of node->base):
363 363
364 364 - Each deltabase must itself be a node elsewhere in the pack
365 365 - There must be no cycles
366 366 """
367 367 failures = 0
368 368 for node in nodes:
369 369 seen = set()
370 370 current = node
371 371 deltabase = bases[current]
372 372
373 373 while deltabase != sha1nodeconstants.nullid:
374 374 if deltabase not in nodes:
375 375 ui.warn(
376 376 (
377 377 b"Bad entry: %s has an unknown deltabase (%s)\n"
378 378 % (short(node), short(deltabase))
379 379 )
380 380 )
381 381 failures += 1
382 382 break
383 383
384 384 if deltabase in seen:
385 385 ui.warn(
386 386 (
387 387 b"Bad entry: %s has a cycle (at %s)\n"
388 388 % (short(node), short(deltabase))
389 389 )
390 390 )
391 391 failures += 1
392 392 break
393 393
394 394 current = deltabase
395 395 seen.add(current)
396 396 deltabase = bases[current]
397 397 # Since ``node`` begins a valid chain, reset/memoize its base to nullid
398 398 # so we don't traverse it again.
399 399 bases[node] = sha1nodeconstants.nullid
400 400 return failures
401 401
402 402
403 403 def dumpdeltachain(ui, deltachain, **opts):
404 404 hashformatter = hex
405 405 hashlen = 40
406 406
407 407 lastfilename = None
408 408 for filename, node, filename, deltabasenode, delta in deltachain:
409 409 if filename != lastfilename:
410 410 ui.write(b"\n%s\n" % filename)
411 411 lastfilename = filename
412 412 ui.write(
413 413 b"%s %s %s %s\n"
414 414 % (
415 415 b"Node".ljust(hashlen),
416 416 b"Delta Base".ljust(hashlen),
417 417 b"Delta SHA1".ljust(hashlen),
418 418 b"Delta Length".ljust(6),
419 419 )
420 420 )
421 421
422 422 ui.write(
423 423 b"%s %s %s %d\n"
424 424 % (
425 425 hashformatter(node),
426 426 hashformatter(deltabasenode),
427 427 hex(hashutil.sha1(delta).digest()),
428 428 len(delta),
429 429 )
430 430 )
431 431
432 432
433 433 def debughistorypack(ui, path):
434 434 if b'.hist' in path:
435 435 path = path[: path.index(b'.hist')]
436 436 hpack = historypack.historypack(path)
437 437
438 438 lastfilename = None
439 439 for entry in hpack.iterentries():
440 440 filename, node, p1node, p2node, linknode, copyfrom = entry
441 441 if filename != lastfilename:
442 442 ui.write(b"\n%s\n" % filename)
443 443 ui.write(
444 444 b"%s%s%s%s%s\n"
445 445 % (
446 446 b"Node".ljust(14),
447 447 b"P1 Node".ljust(14),
448 448 b"P2 Node".ljust(14),
449 449 b"Link Node".ljust(14),
450 450 b"Copy From",
451 451 )
452 452 )
453 453 lastfilename = filename
454 454 ui.write(
455 455 b"%s %s %s %s %s\n"
456 456 % (
457 457 short(node),
458 458 short(p1node),
459 459 short(p2node),
460 460 short(linknode),
461 461 copyfrom,
462 462 )
463 463 )
464 464
465 465
466 466 def debugwaitonrepack(repo):
467 467 with lockmod.lock(repack.repacklockvfs(repo), b"repacklock", timeout=-1):
468 468 return
469 469
470 470
471 471 def debugwaitonprefetch(repo):
472 472 with repo._lock(
473 473 repo.svfs,
474 474 b"prefetchlock",
475 475 True,
476 476 None,
477 477 None,
478 478 _(b'prefetching in %s') % repo.origroot,
479 479 ):
480 480 pass
@@ -1,472 +1,476 b''
1 1 # remotefilelog.py - filelog implementation where filelog history is stored
2 2 # remotely
3 3 #
4 4 # Copyright 2013 Facebook, Inc.
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 import collections
10 10
11 from typing import (
12 Iterator,
13 )
14
11 15 from mercurial.node import bin
12 16 from mercurial.i18n import _
13 17 from mercurial import (
14 18 ancestor,
15 19 error,
16 20 mdiff,
17 21 revlog,
18 22 )
19 23 from mercurial.utils import storageutil
20 24 from mercurial.revlogutils import flagutil
21 25
22 26 from . import (
23 27 constants,
24 28 shallowutil,
25 29 )
26 30
27 31
28 32 class remotefilelognodemap:
29 33 def __init__(self, filename, store):
30 34 self._filename = filename
31 35 self._store = store
32 36
33 37 def __contains__(self, node):
34 38 missing = self._store.getmissing([(self._filename, node)])
35 39 return not bool(missing)
36 40
37 41 def __get__(self, node):
38 42 if node not in self:
39 43 raise KeyError(node)
40 44 return node
41 45
42 46
43 47 class remotefilelog:
44 48 _flagserrorclass = error.RevlogError
45 49
46 50 def __init__(self, opener, path, repo):
47 51 self.opener = opener
48 52 self.filename = path
49 53 self.repo = repo
50 54 self.nodemap = remotefilelognodemap(self.filename, repo.contentstore)
51 55
52 56 self.version = 1
53 57
54 58 self._flagprocessors = dict(flagutil.flagprocessors)
55 59
56 60 def read(self, node):
57 61 """returns the file contents at this node"""
58 62 t = self.revision(node)
59 63 if not t.startswith(b'\1\n'):
60 64 return t
61 65 s = t.index(b'\1\n', 2)
62 66 return t[s + 2 :]
63 67
64 68 def add(self, text, meta, transaction, linknode, p1=None, p2=None):
65 69 # hash with the metadata, like in vanilla filelogs
66 70 hashtext = shallowutil.createrevlogtext(
67 71 text, meta.get(b'copy'), meta.get(b'copyrev')
68 72 )
69 73 node = storageutil.hashrevisionsha1(hashtext, p1, p2)
70 74 return self.addrevision(
71 75 hashtext, transaction, linknode, p1, p2, node=node
72 76 )
73 77
74 78 def _createfileblob(self, text, meta, flags, p1, p2, node, linknode):
75 79 # text passed to "_createfileblob" does not include filelog metadata
76 80 header = shallowutil.buildfileblobheader(len(text), flags)
77 81 data = b"%s\0%s" % (header, text)
78 82
79 83 realp1 = p1
80 84 copyfrom = b""
81 85 if meta and b'copy' in meta:
82 86 copyfrom = meta[b'copy']
83 87 realp1 = bin(meta[b'copyrev'])
84 88
85 89 data += b"%s%s%s%s%s\0" % (node, realp1, p2, linknode, copyfrom)
86 90
87 91 visited = set()
88 92
89 93 pancestors = {}
90 94 queue = []
91 95 if realp1 != self.repo.nullid:
92 96 p1flog = self
93 97 if copyfrom:
94 98 p1flog = remotefilelog(self.opener, copyfrom, self.repo)
95 99
96 100 pancestors.update(p1flog.ancestormap(realp1))
97 101 queue.append(realp1)
98 102 visited.add(realp1)
99 103 if p2 != self.repo.nullid:
100 104 pancestors.update(self.ancestormap(p2))
101 105 queue.append(p2)
102 106 visited.add(p2)
103 107
104 108 ancestortext = b""
105 109
106 110 # add the ancestors in topological order
107 111 while queue:
108 112 c = queue.pop(0)
109 113 pa1, pa2, ancestorlinknode, pacopyfrom = pancestors[c]
110 114
111 115 pacopyfrom = pacopyfrom or b''
112 116 ancestortext += b"%s%s%s%s%s\0" % (
113 117 c,
114 118 pa1,
115 119 pa2,
116 120 ancestorlinknode,
117 121 pacopyfrom,
118 122 )
119 123
120 124 if pa1 != self.repo.nullid and pa1 not in visited:
121 125 queue.append(pa1)
122 126 visited.add(pa1)
123 127 if pa2 != self.repo.nullid and pa2 not in visited:
124 128 queue.append(pa2)
125 129 visited.add(pa2)
126 130
127 131 data += ancestortext
128 132
129 133 return data
130 134
131 135 def addrevision(
132 136 self,
133 137 text,
134 138 transaction,
135 139 linknode,
136 140 p1,
137 141 p2,
138 142 cachedelta=None,
139 143 node=None,
140 144 flags=revlog.REVIDX_DEFAULT_FLAGS,
141 145 sidedata=None,
142 146 ):
143 147 # text passed to "addrevision" includes hg filelog metadata header
144 148 if node is None:
145 149 node = storageutil.hashrevisionsha1(text, p1, p2)
146 150
147 151 meta, metaoffset = storageutil.parsemeta(text)
148 152 rawtext, validatehash = flagutil.processflagswrite(
149 153 self,
150 154 text,
151 155 flags,
152 156 )
153 157 return self.addrawrevision(
154 158 rawtext,
155 159 transaction,
156 160 linknode,
157 161 p1,
158 162 p2,
159 163 node,
160 164 flags,
161 165 cachedelta,
162 166 _metatuple=(meta, metaoffset),
163 167 )
164 168
165 169 def addrawrevision(
166 170 self,
167 171 rawtext,
168 172 transaction,
169 173 linknode,
170 174 p1,
171 175 p2,
172 176 node,
173 177 flags,
174 178 cachedelta=None,
175 179 _metatuple=None,
176 180 ):
177 181 if _metatuple:
178 182 # _metatuple: used by "addrevision" internally by remotefilelog
179 183 # meta was parsed confidently
180 184 meta, metaoffset = _metatuple
181 185 else:
182 186 # not from self.addrevision, but something else (repo._filecommit)
183 187 # calls addrawrevision directly. remotefilelog needs to get and
184 188 # strip filelog metadata.
185 189 # we don't have confidence about whether rawtext contains filelog
186 190 # metadata or not (flag processor could replace it), so we just
187 191 # parse it as best-effort.
188 192 # in LFS (flags != 0)'s case, the best way is to call LFS code to
189 193 # get the meta information, instead of storageutil.parsemeta.
190 194 meta, metaoffset = storageutil.parsemeta(rawtext)
191 195 if flags != 0:
192 196 # when flags != 0, be conservative and do not mangle rawtext, since
193 197 # a read flag processor expects the text not being mangled at all.
194 198 metaoffset = 0
195 199 if metaoffset:
196 200 # remotefilelog fileblob stores copy metadata in its ancestortext,
197 201 # not its main blob. so we need to remove filelog metadata
198 202 # (containing copy information) from text.
199 203 blobtext = rawtext[metaoffset:]
200 204 else:
201 205 blobtext = rawtext
202 206 data = self._createfileblob(
203 207 blobtext, meta, flags, p1, p2, node, linknode
204 208 )
205 209 self.repo.contentstore.addremotefilelognode(self.filename, node, data)
206 210
207 211 return node
208 212
209 213 def renamed(self, node):
210 214 ancestors = self.repo.metadatastore.getancestors(self.filename, node)
211 215 p1, p2, linknode, copyfrom = ancestors[node]
212 216 if copyfrom:
213 217 return (copyfrom, p1)
214 218
215 219 return False
216 220
217 221 def size(self, node):
218 222 """return the size of a given revision"""
219 223 return len(self.read(node))
220 224
221 225 rawsize = size
222 226
223 227 def cmp(self, node, text):
224 228 """compare text with a given file revision
225 229
226 230 returns True if text is different than what is stored.
227 231 """
228 232
229 233 if node == self.repo.nullid:
230 234 return True
231 235
232 236 nodetext = self.read(node)
233 237 return nodetext != text
234 238
235 239 def __nonzero__(self):
236 240 return True
237 241
238 242 __bool__ = __nonzero__
239 243
240 244 def __len__(self):
241 245 if self.filename in (b'.hgtags', b'.hgsub', b'.hgsubstate'):
242 246 # Global tag and subrepository support require access to the
243 247 # file history for various performance sensitive operations.
244 248 # excludepattern should be used for repositories depending on
245 249 # those features to fallback to regular filelog.
246 250 return 0
247 251
248 252 raise RuntimeError(b"len not supported")
249 253
250 254 def heads(self):
251 255 # Fake heads of the filelog to satisfy hgweb.
252 256 return []
253 257
254 258 def empty(self):
255 259 return False
256 260
257 261 def flags(self, node):
258 262 if isinstance(node, int):
259 263 raise error.ProgrammingError(
260 264 b'remotefilelog does not accept integer rev for flags'
261 265 )
262 266 store = self.repo.contentstore
263 267 return store.getmeta(self.filename, node).get(constants.METAKEYFLAG, 0)
264 268
265 269 def parents(self, node):
266 270 if node == self.repo.nullid:
267 271 return self.repo.nullid, self.repo.nullid
268 272
269 273 ancestormap = self.repo.metadatastore.getancestors(self.filename, node)
270 274 p1, p2, linknode, copyfrom = ancestormap[node]
271 275 if copyfrom:
272 276 p1 = self.repo.nullid
273 277
274 278 return p1, p2
275 279
276 280 def parentrevs(self, rev):
277 281 # TODO(augie): this is a node and should be a rev, but for now
278 282 # nothing in core seems to actually break.
279 283 return self.parents(rev)
280 284
281 285 def linknode(self, node):
282 286 ancestormap = self.repo.metadatastore.getancestors(self.filename, node)
283 287 p1, p2, linknode, copyfrom = ancestormap[node]
284 288 return linknode
285 289
286 290 def linkrev(self, node):
287 291 return self.repo.unfiltered().changelog.rev(self.linknode(node))
288 292
289 293 def emitrevisions(
290 294 self,
291 295 nodes,
292 296 nodesorder=None,
293 297 revisiondata=False,
294 298 assumehaveparentrevisions=False,
295 299 deltaprevious=False,
296 300 deltamode=None,
297 301 sidedata_helpers=None,
298 302 debug_info=None,
299 ):
303 ) -> Iterator[revlog.RevLogRevisionDelta]:
300 304 # we don't use any of these parameters here
301 305 del nodesorder, revisiondata, assumehaveparentrevisions, deltaprevious
302 306 del deltamode
303 307 prevnode = None
304 308 for node in nodes:
305 309 p1, p2 = self.parents(node)
306 310 if prevnode is None:
307 311 basenode = prevnode = p1
308 312 if basenode == node:
309 313 basenode = self.repo.nullid
310 314 if basenode != self.repo.nullid:
311 315 revision = None
312 316 delta = self.revdiff(basenode, node)
313 317 else:
314 318 revision = self.rawdata(node)
315 319 delta = None
316 320 yield revlog.revlogrevisiondelta(
317 321 node=node,
318 322 p1node=p1,
319 323 p2node=p2,
320 324 linknode=self.linknode(node),
321 325 basenode=basenode,
322 326 flags=self.flags(node),
323 327 baserevisionsize=None,
324 328 revision=revision,
325 329 delta=delta,
326 330 # Sidedata is not supported yet
327 331 sidedata=None,
328 332 # Protocol flags are not used yet
329 333 protocol_flags=0,
330 334 )
331 335
332 336 def revdiff(self, node1, node2):
333 337 return mdiff.textdiff(self.rawdata(node1), self.rawdata(node2))
334 338
335 339 def lookup(self, node):
336 340 if len(node) == 40:
337 341 node = bin(node)
338 342 if len(node) != 20:
339 343 raise error.LookupError(
340 344 node, self.filename, _(b'invalid lookup input')
341 345 )
342 346
343 347 return node
344 348
345 349 def rev(self, node):
346 350 # This is a hack to make TortoiseHG work.
347 351 return node
348 352
349 353 def node(self, rev):
350 354 # This is a hack.
351 355 if isinstance(rev, int):
352 356 raise error.ProgrammingError(
353 357 b'remotefilelog does not convert integer rev to node'
354 358 )
355 359 return rev
356 360
357 361 def revision(self, node, raw=False):
358 362 """returns the revlog contents at this node.
359 363 this includes the meta data traditionally included in file revlogs.
360 364 this is generally only used for bundling and communicating with vanilla
361 365 hg clients.
362 366 """
363 367 if node == self.repo.nullid:
364 368 return b""
365 369 if len(node) != 20:
366 370 raise error.LookupError(
367 371 node, self.filename, _(b'invalid revision input')
368 372 )
369 373 if (
370 374 node == self.repo.nodeconstants.wdirid
371 375 or node in self.repo.nodeconstants.wdirfilenodeids
372 376 ):
373 377 raise error.WdirUnsupported
374 378
375 379 store = self.repo.contentstore
376 380 rawtext = store.get(self.filename, node)
377 381 if raw:
378 382 return rawtext
379 383 flags = store.getmeta(self.filename, node).get(constants.METAKEYFLAG, 0)
380 384 if flags == 0:
381 385 return rawtext
382 386 return flagutil.processflagsread(self, rawtext, flags)[0]
383 387
384 388 def rawdata(self, node):
385 389 return self.revision(node, raw=False)
386 390
387 391 def ancestormap(self, node):
388 392 return self.repo.metadatastore.getancestors(self.filename, node)
389 393
390 394 def ancestor(self, a, b):
391 395 if a == self.repo.nullid or b == self.repo.nullid:
392 396 return self.repo.nullid
393 397
394 398 revmap, parentfunc = self._buildrevgraph(a, b)
395 399 nodemap = {v: k for (k, v) in revmap.items()}
396 400
397 401 ancs = ancestor.ancestors(parentfunc, revmap[a], revmap[b])
398 402 if ancs:
399 403 # choose a consistent winner when there's a tie
400 404 return min(map(nodemap.__getitem__, ancs))
401 405 return self.repo.nullid
402 406
403 407 def commonancestorsheads(self, a, b):
404 408 """calculate all the heads of the common ancestors of nodes a and b"""
405 409
406 410 if a == self.repo.nullid or b == self.repo.nullid:
407 411 return self.repo.nullid
408 412
409 413 revmap, parentfunc = self._buildrevgraph(a, b)
410 414 nodemap = {v: k for (k, v) in revmap.items()}
411 415
412 416 ancs = ancestor.commonancestorsheads(parentfunc, revmap[a], revmap[b])
413 417 return map(nodemap.__getitem__, ancs)
414 418
415 419 def _buildrevgraph(self, a, b):
416 420 """Builds a numeric revision graph for the given two nodes.
417 421 Returns a node->rev map and a rev->[revs] parent function.
418 422 """
419 423 amap = self.ancestormap(a)
420 424 bmap = self.ancestormap(b)
421 425
422 426 # Union the two maps
423 427 parentsmap = collections.defaultdict(list)
424 428 allparents = set()
425 429 for mapping in (amap, bmap):
426 430 for node, pdata in mapping.items():
427 431 parents = parentsmap[node]
428 432 p1, p2, linknode, copyfrom = pdata
429 433 # Don't follow renames (copyfrom).
430 434 # remotefilectx.ancestor does that.
431 435 if p1 != self.repo.nullid and not copyfrom:
432 436 parents.append(p1)
433 437 allparents.add(p1)
434 438 if p2 != self.repo.nullid:
435 439 parents.append(p2)
436 440 allparents.add(p2)
437 441
438 442 # Breadth first traversal to build linkrev graph
439 443 parentrevs = collections.defaultdict(list)
440 444 revmap = {}
441 445 queue = collections.deque(
442 446 ((None, n) for n in parentsmap if n not in allparents)
443 447 )
444 448 while queue:
445 449 prevrev, current = queue.pop()
446 450 if current in revmap:
447 451 if prevrev:
448 452 parentrevs[prevrev].append(revmap[current])
449 453 continue
450 454
451 455 # Assign linkrevs in reverse order, so start at
452 456 # len(parentsmap) and work backwards.
453 457 currentrev = len(parentsmap) - len(revmap) - 1
454 458 revmap[current] = currentrev
455 459
456 460 if prevrev:
457 461 parentrevs[prevrev].append(currentrev)
458 462
459 463 for parent in parentsmap.get(current):
460 464 queue.appendleft((currentrev, parent))
461 465
462 466 return revmap, parentrevs.__getitem__
463 467
464 468 def strip(self, minlink, transaction):
465 469 pass
466 470
467 471 # misc unused things
468 472 def files(self):
469 473 return []
470 474
471 475 def checksize(self):
472 476 return 0, 0
@@ -1,319 +1,328 b''
1 1 # filelog.py - file history class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import typing
10 10
11 from typing import (
12 Iterable,
13 Iterator,
14 )
15
11 16 from .i18n import _
12 17 from .node import nullrev
13 18 from . import (
14 19 error,
15 20 revlog,
16 21 )
17 22 from .interfaces import (
18 23 repository,
19 24 util as interfaceutil,
20 25 )
21 26 from .utils import storageutil
22 27 from .revlogutils import (
23 28 constants as revlog_constants,
24 29 rewrite,
25 30 )
26 31
27 32
28 33 class FileLog:
34 _revlog: revlog.revlog
35 nullid: bytes
36 _fix_issue6528: bool
37
29 38 def __init__(self, opener, path, try_split=False):
30 39 self._revlog = revlog.revlog(
31 40 opener,
32 41 # XXX should use the unencoded path
33 42 target=(revlog_constants.KIND_FILELOG, path),
34 43 radix=b'/'.join((b'data', path)),
35 44 censorable=True,
36 45 canonical_parent_order=False, # see comment in revlog.py
37 46 try_split=try_split,
38 47 )
39 48 # Full name of the user visible file, relative to the repository root.
40 49 # Used by LFS.
41 50 self._revlog.filename = path
42 51 self.nullid = self._revlog.nullid
43 52 opts = opener.options
44 53 self._fix_issue6528 = opts.get(b'issue6528.fix-incoming', True)
45 54
46 def get_revlog(self):
55 def get_revlog(self) -> revlog.revlog:
47 56 """return an actual revlog instance if any
48 57
49 58 This exist because a lot of code leverage the fact the underlying
50 59 storage is a revlog for optimization, so giving simple way to access
51 60 the revlog instance helps such code.
52 61 """
53 62 return self._revlog
54 63
55 def __len__(self):
64 def __len__(self) -> int:
56 65 return len(self._revlog)
57 66
58 def __iter__(self):
67 def __iter__(self) -> Iterator[int]:
59 68 return self._revlog.__iter__()
60 69
61 70 def hasnode(self, node):
62 71 if node in (self.nullid, nullrev):
63 72 return False
64 73
65 74 try:
66 75 self._revlog.rev(node)
67 76 return True
68 77 except (TypeError, ValueError, IndexError, error.LookupError):
69 78 return False
70 79
71 80 def revs(self, start=0, stop=None):
72 81 return self._revlog.revs(start=start, stop=stop)
73 82
74 83 def parents(self, node):
75 84 return self._revlog.parents(node)
76 85
77 86 def parentrevs(self, rev):
78 87 return self._revlog.parentrevs(rev)
79 88
80 89 def rev(self, node):
81 90 return self._revlog.rev(node)
82 91
83 92 def node(self, rev):
84 93 return self._revlog.node(rev)
85 94
86 95 def lookup(self, node):
87 96 return storageutil.fileidlookup(
88 97 self._revlog, node, self._revlog.display_id
89 98 )
90 99
91 100 def linkrev(self, rev):
92 101 return self._revlog.linkrev(rev)
93 102
94 103 def commonancestorsheads(self, node1, node2):
95 104 return self._revlog.commonancestorsheads(node1, node2)
96 105
97 106 # Used by dagop.blockdescendants().
98 107 def descendants(self, revs):
99 108 return self._revlog.descendants(revs)
100 109
101 110 def heads(self, start=None, stop=None):
102 111 return self._revlog.heads(start, stop)
103 112
104 113 # Used by hgweb, children extension.
105 114 def children(self, node):
106 115 return self._revlog.children(node)
107 116
108 117 def iscensored(self, rev):
109 118 return self._revlog.iscensored(rev)
110 119
111 120 def revision(self, node):
112 121 return self._revlog.revision(node)
113 122
114 123 def rawdata(self, node):
115 124 return self._revlog.rawdata(node)
116 125
117 126 def emitrevisions(
118 127 self,
119 128 nodes,
120 129 nodesorder=None,
121 130 revisiondata=False,
122 131 assumehaveparentrevisions=False,
123 132 deltamode=repository.CG_DELTAMODE_STD,
124 133 sidedata_helpers=None,
125 134 debug_info=None,
126 135 ):
127 136 return self._revlog.emitrevisions(
128 137 nodes,
129 138 nodesorder=nodesorder,
130 139 revisiondata=revisiondata,
131 140 assumehaveparentrevisions=assumehaveparentrevisions,
132 141 deltamode=deltamode,
133 142 sidedata_helpers=sidedata_helpers,
134 143 debug_info=debug_info,
135 144 )
136 145
137 146 def addrevision(
138 147 self,
139 148 revisiondata,
140 149 transaction,
141 150 linkrev,
142 151 p1,
143 152 p2,
144 153 node=None,
145 154 flags=revlog.REVIDX_DEFAULT_FLAGS,
146 155 cachedelta=None,
147 156 ):
148 157 return self._revlog.addrevision(
149 158 revisiondata,
150 159 transaction,
151 160 linkrev,
152 161 p1,
153 162 p2,
154 163 node=node,
155 164 flags=flags,
156 165 cachedelta=cachedelta,
157 166 )
158 167
159 168 def addgroup(
160 169 self,
161 170 deltas,
162 171 linkmapper,
163 172 transaction,
164 173 addrevisioncb=None,
165 174 duplicaterevisioncb=None,
166 175 maybemissingparents=False,
167 176 debug_info=None,
168 177 delta_base_reuse_policy=None,
169 178 ):
170 179 if maybemissingparents:
171 180 raise error.Abort(
172 181 _(
173 182 b'revlog storage does not support missing '
174 183 b'parents write mode'
175 184 )
176 185 )
177 186
178 187 with self._revlog._writing(transaction):
179 188 if self._fix_issue6528:
180 189 deltas = rewrite.filter_delta_issue6528(self._revlog, deltas)
181 190
182 191 return self._revlog.addgroup(
183 192 deltas,
184 193 linkmapper,
185 194 transaction,
186 195 addrevisioncb=addrevisioncb,
187 196 duplicaterevisioncb=duplicaterevisioncb,
188 197 debug_info=debug_info,
189 198 delta_base_reuse_policy=delta_base_reuse_policy,
190 199 )
191 200
192 201 def getstrippoint(self, minlink):
193 202 return self._revlog.getstrippoint(minlink)
194 203
195 204 def strip(self, minlink, transaction):
196 205 return self._revlog.strip(minlink, transaction)
197 206
198 207 def censorrevision(self, tr, node, tombstone=b''):
199 208 return self._revlog.censorrevision(tr, node, tombstone=tombstone)
200 209
201 210 def files(self):
202 211 return self._revlog.files()
203 212
204 213 def read(self, node):
205 214 return storageutil.filtermetadata(self.revision(node))
206 215
207 216 def add(self, text, meta, transaction, link, p1=None, p2=None):
208 217 if meta or text.startswith(b'\1\n'):
209 218 text = storageutil.packmeta(meta, text)
210 219 rev = self.addrevision(text, transaction, link, p1, p2)
211 220 return self.node(rev)
212 221
213 222 def renamed(self, node):
214 223 return storageutil.filerevisioncopied(self, node)
215 224
216 225 def size(self, rev):
217 226 """return the size of a given revision"""
218 227
219 228 # for revisions with renames, we have to go the slow way
220 229 node = self.node(rev)
221 230 if self.iscensored(rev):
222 231 return 0
223 232 if self.renamed(node):
224 233 return len(self.read(node))
225 234
226 235 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
227 236 # XXX See also basefilectx.cmp.
228 237 return self._revlog.size(rev)
229 238
230 239 def cmp(self, node, text):
231 240 """compare text with a given file revision
232 241
233 242 returns True if text is different than what is stored.
234 243 """
235 244 return not storageutil.filedataequivalent(self, node, text)
236 245
237 def verifyintegrity(self, state):
246 def verifyintegrity(self, state) -> Iterable[revlog.RevLogProblem]:
238 247 return self._revlog.verifyintegrity(state)
239 248
240 249 def storageinfo(
241 250 self,
242 251 exclusivefiles=False,
243 252 sharedfiles=False,
244 253 revisionscount=False,
245 254 trackedsize=False,
246 255 storedsize=False,
247 256 ):
248 257 return self._revlog.storageinfo(
249 258 exclusivefiles=exclusivefiles,
250 259 sharedfiles=sharedfiles,
251 260 revisionscount=revisionscount,
252 261 trackedsize=trackedsize,
253 262 storedsize=storedsize,
254 263 )
255 264
256 265 # Used by repo upgrade.
257 266 def clone(self, tr, destrevlog, **kwargs):
258 267 if not isinstance(destrevlog, filelog):
259 268 msg = b'expected filelog to clone(), not %r'
260 269 msg %= destrevlog
261 270 raise error.ProgrammingError(msg)
262 271
263 272 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
264 273
265 274
266 275 filelog = interfaceutil.implementer(repository.ifilestorage)(FileLog)
267 276
268 277 if typing.TYPE_CHECKING:
269 278 filelog = FileLog
270 279
271 280
272 281 class narrowfilelog(filelog):
273 282 """Filelog variation to be used with narrow stores."""
274 283
275 284 def __init__(self, opener, path, narrowmatch, try_split=False):
276 285 super(narrowfilelog, self).__init__(opener, path, try_split=try_split)
277 286 self._narrowmatch = narrowmatch
278 287
279 288 def renamed(self, node):
280 289 res = super(narrowfilelog, self).renamed(node)
281 290
282 291 # Renames that come from outside the narrowspec are problematic
283 292 # because we may lack the base text for the rename. This can result
284 293 # in code attempting to walk the ancestry or compute a diff
285 294 # encountering a missing revision. We address this by silently
286 295 # removing rename metadata if the source file is outside the
287 296 # narrow spec.
288 297 #
289 298 # A better solution would be to see if the base revision is available,
290 299 # rather than assuming it isn't.
291 300 #
292 301 # An even better solution would be to teach all consumers of rename
293 302 # metadata that the base revision may not be available.
294 303 #
295 304 # TODO consider better ways of doing this.
296 305 if res and not self._narrowmatch(res[0]):
297 306 return None
298 307
299 308 return res
300 309
301 310 def size(self, rev):
302 311 # Because we have a custom renamed() that may lie, we need to call
303 312 # the base renamed() to report accurate results.
304 313 node = self.node(rev)
305 314 if super(narrowfilelog, self).renamed(node):
306 315 return len(self.read(node))
307 316 else:
308 317 return super(narrowfilelog, self).size(rev)
309 318
310 319 def cmp(self, node, text):
311 320 # We don't call `super` because narrow parents can be buggy in case of a
312 321 # ambiguous dirstate. Always take the slow path until there is a better
313 322 # fix, see issue6150.
314 323
315 324 # Censored files compare against the empty file.
316 325 if self.iscensored(self.rev(node)):
317 326 return text != b''
318 327
319 328 return self.read(node) != text
@@ -1,2780 +1,2797 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import heapq
10 10 import itertools
11 11 import struct
12 12 import typing
13 13 import weakref
14 14
15 15 from typing import (
16 16 ByteString,
17 17 Callable,
18 18 Collection,
19 19 Dict,
20 20 Iterable,
21 21 Iterator,
22 22 List,
23 23 Optional,
24 24 Set,
25 25 Tuple,
26 26 Union,
27 27 cast,
28 28 )
29 29
30 30 from .i18n import _
31 31 from .node import (
32 32 bin,
33 33 hex,
34 34 nullrev,
35 35 )
36 36 from . import (
37 37 encoding,
38 38 error,
39 39 match as matchmod,
40 40 mdiff,
41 41 pathutil,
42 42 policy,
43 43 pycompat,
44 44 revlog,
45 45 util,
46 46 )
47 47 from .interfaces import (
48 48 repository,
49 49 util as interfaceutil,
50 50 )
51 51 from .revlogutils import (
52 52 constants as revlog_constants,
53 53 )
54 54
55 55 parsers = policy.importmod('parsers')
56 56 propertycache = util.propertycache
57 57
58 58 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
59 59 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
60 60
61 61
62 62 def _parse(nodelen, data: bytes):
63 63 # This method does a little bit of excessive-looking
64 64 # precondition checking. This is so that the behavior of this
65 65 # class exactly matches its C counterpart to try and help
66 66 # prevent surprise breakage for anyone that develops against
67 67 # the pure version.
68 68 if data and data[-1:] != b'\n':
69 69 raise ValueError(b'Manifest did not end in a newline.')
70 70 prev = None
71 71 for l in data.splitlines():
72 72 if prev is not None and prev > l:
73 73 raise ValueError(b'Manifest lines not in sorted order.')
74 74 prev = l
75 75 f, n = l.split(b'\0')
76 76 nl = len(n)
77 77 flags = n[-1:]
78 78 if flags in _manifestflags:
79 79 n = n[:-1]
80 80 nl -= 1
81 81 else:
82 82 flags = b''
83 83 if nl != 2 * nodelen:
84 84 raise ValueError(b'Invalid manifest line')
85 85
86 86 yield f, bin(n), flags
87 87
88 88
89 89 def _text(it):
90 90 files = []
91 91 lines = []
92 92 for f, n, fl in it:
93 93 files.append(f)
94 94 # if this is changed to support newlines in filenames,
95 95 # be sure to check the templates/ dir again (especially *-raw.tmpl)
96 96 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
97 97
98 98 _checkforbidden(files)
99 99 return b''.join(lines)
100 100
101 101
102 102 class lazymanifestiter:
103 103 def __init__(self, lm: '_LazyManifest') -> None:
104 104 self.pos = 0
105 105 self.lm = lm
106 106
107 107 def __iter__(self) -> 'lazymanifestiter':
108 108 return self
109 109
110 110 def next(self) -> bytes:
111 111 try:
112 112 data, pos = self.lm._get(self.pos)
113 113 except IndexError:
114 114 raise StopIteration
115 115 if pos == -1:
116 116 assert isinstance(data, tuple)
117 117 self.pos += 1
118 118 return data[0]
119 119 assert isinstance(data, bytes)
120 120 self.pos += 1
121 121 zeropos = data.find(b'\x00', pos)
122 122 return data[pos:zeropos]
123 123
124 124 __next__ = next
125 125
126 126
127 127 class lazymanifestiterentries:
128 128 def __init__(self, lm: '_LazyManifest') -> None:
129 129 self.lm = lm
130 130 self.pos = 0
131 131
132 132 def __iter__(self) -> 'lazymanifestiterentries':
133 133 return self
134 134
135 135 def next(self) -> Tuple[bytes, bytes, bytes]:
136 136 try:
137 137 data, pos = self.lm._get(self.pos)
138 138 except IndexError:
139 139 raise StopIteration
140 140 if pos == -1:
141 141 assert isinstance(data, tuple)
142 142 self.pos += 1
143 143 return data
144 144 assert isinstance(data, bytes)
145 145 zeropos = data.find(b'\x00', pos)
146 146 nlpos = data.find(b'\n', pos)
147 147 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
148 148 raise error.StorageError(b'Invalid manifest line')
149 149 flags = data[nlpos - 1 : nlpos]
150 150 if flags in _manifestflags:
151 151 hlen = nlpos - zeropos - 2
152 152 else:
153 153 hlen = nlpos - zeropos - 1
154 154 flags = b''
155 155 if hlen != 2 * self.lm._nodelen:
156 156 raise error.StorageError(b'Invalid manifest line')
157 157 hashval = unhexlify(
158 158 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
159 159 )
160 160 self.pos += 1
161 161 return (data[pos:zeropos], hashval, flags)
162 162
163 163 __next__ = next
164 164
165 165
166 166 def unhexlify(data: bytes, extra: int, pos, length: int):
167 167 s = bin(data[pos : pos + length])
168 168 if extra:
169 169 s += bytes([extra & 0xFF])
170 170 return s
171 171
172 172
173 173 def _cmp(a, b):
174 174 return (a > b) - (a < b)
175 175
176 176
177 177 _manifestflags = {b'', b'l', b't', b'x'}
178 178
179 179
180 180 class _LazyManifest:
181 181 """A pure python manifest backed by a byte string. It is supplimented with
182 182 internal lists as it is modified, until it is compacted back to a pure byte
183 183 string.
184 184
185 185 ``data`` is the initial manifest data.
186 186
187 187 ``positions`` is a list of offsets, one per manifest entry. Positive
188 188 values are offsets into ``data``, negative values are offsets into the
189 189 ``extradata`` list. When an entry is removed, its entry is dropped from
190 190 ``positions``. The values are encoded such that when walking the list and
191 191 indexing into ``data`` or ``extradata`` as appropriate, the entries are
192 192 sorted by filename.
193 193
194 194 ``extradata`` is a list of (key, hash, flags) for entries that were added or
195 195 modified since the manifest was created or compacted.
196 196 """
197 197
198 198 def __init__(
199 199 self,
200 200 nodelen: int,
201 201 data: bytes,
202 202 positions=None,
203 203 extrainfo=None,
204 204 extradata=None,
205 205 hasremovals: bool = False,
206 206 ):
207 207 self._nodelen = nodelen
208 208 if positions is None:
209 209 self.positions = self.findlines(data)
210 210 self.extrainfo = [0] * len(self.positions)
211 211 self.data = data
212 212 self.extradata = []
213 213 self.hasremovals = False
214 214 else:
215 215 self.positions = positions[:]
216 216 self.extrainfo = extrainfo[:]
217 217 self.extradata = extradata[:]
218 218 self.data = data
219 219 self.hasremovals = hasremovals
220 220
221 221 def findlines(self, data: bytes) -> List[int]:
222 222 if not data:
223 223 return []
224 224 pos = data.find(b"\n")
225 225 if pos == -1 or data[-1:] != b'\n':
226 226 raise ValueError(b"Manifest did not end in a newline.")
227 227 positions = [0]
228 228 prev = data[: data.find(b'\x00')]
229 229 while pos < len(data) - 1 and pos != -1:
230 230 positions.append(pos + 1)
231 231 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
232 232 if nexts < prev:
233 233 raise ValueError(b"Manifest lines not in sorted order.")
234 234 prev = nexts
235 235 pos = data.find(b"\n", pos + 1)
236 236 return positions
237 237
238 238 def _get(
239 239 self, index: int
240 240 ) -> Tuple[Union[bytes, Tuple[bytes, bytes, bytes]], int]:
241 241 # get the position encoded in pos:
242 242 # positive number is an index in 'data'
243 243 # negative number is in extrapieces
244 244 pos = self.positions[index]
245 245 if pos >= 0:
246 246 return self.data, pos
247 247 return self.extradata[-pos - 1], -1
248 248
249 249 def _getkey(self, pos) -> bytes:
250 250 if pos >= 0:
251 251 return self.data[pos : self.data.find(b'\x00', pos + 1)]
252 252 return self.extradata[-pos - 1][0]
253 253
254 254 def bsearch(self, key: bytes) -> int:
255 255 first = 0
256 256 last = len(self.positions) - 1
257 257
258 258 while first <= last:
259 259 midpoint = (first + last) // 2
260 260 nextpos = self.positions[midpoint]
261 261 candidate = self._getkey(nextpos)
262 262 r = _cmp(key, candidate)
263 263 if r == 0:
264 264 return midpoint
265 265 else:
266 266 if r < 0:
267 267 last = midpoint - 1
268 268 else:
269 269 first = midpoint + 1
270 270 return -1
271 271
272 272 def bsearch2(self, key: bytes) -> Tuple[int, bool]:
273 273 # same as the above, but will always return the position
274 274 # done for performance reasons
275 275 first = 0
276 276 last = len(self.positions) - 1
277 277
278 278 while first <= last:
279 279 midpoint = (first + last) // 2
280 280 nextpos = self.positions[midpoint]
281 281 candidate = self._getkey(nextpos)
282 282 r = _cmp(key, candidate)
283 283 if r == 0:
284 284 return (midpoint, True)
285 285 else:
286 286 if r < 0:
287 287 last = midpoint - 1
288 288 else:
289 289 first = midpoint + 1
290 290 return (first, False)
291 291
292 292 def __contains__(self, key: bytes) -> bool:
293 293 return self.bsearch(key) != -1
294 294
295 295 def __getitem__(self, key: bytes) -> Tuple[bytes, bytes]:
296 296 if not isinstance(key, bytes):
297 297 raise TypeError(b"getitem: manifest keys must be a bytes.")
298 298 needle = self.bsearch(key)
299 299 if needle == -1:
300 300 raise KeyError
301 301 data, pos = self._get(needle)
302 302 if pos == -1:
303 303 assert isinstance(data, tuple)
304 304 return (data[1], data[2])
305 305
306 306 assert isinstance(data, bytes)
307 307 zeropos = data.find(b'\x00', pos)
308 308 nlpos = data.find(b'\n', zeropos)
309 309 assert 0 <= needle <= len(self.positions)
310 310 assert len(self.extrainfo) == len(self.positions)
311 311 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
312 312 raise error.StorageError(b'Invalid manifest line')
313 313 hlen = nlpos - zeropos - 1
314 314 flags = data[nlpos - 1 : nlpos]
315 315 if flags in _manifestflags:
316 316 hlen -= 1
317 317 else:
318 318 flags = b''
319 319 if hlen != 2 * self._nodelen:
320 320 raise error.StorageError(b'Invalid manifest line')
321 321 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
322 322 return (hashval, flags)
323 323
324 324 def __delitem__(self, key: bytes) -> None:
325 325 needle, found = self.bsearch2(key)
326 326 if not found:
327 327 raise KeyError
328 328 cur = self.positions[needle]
329 329 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
330 330 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
331 331 if cur >= 0:
332 332 # This does NOT unsort the list as far as the search functions are
333 333 # concerned, as they only examine lines mapped by self.positions.
334 334 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
335 335 self.hasremovals = True
336 336
337 337 def __setitem__(self, key: bytes, value: Tuple[bytes, bytes]):
338 338 if not isinstance(key, bytes):
339 339 raise TypeError(b"setitem: manifest keys must be a byte string.")
340 340 if not isinstance(value, tuple) or len(value) != 2:
341 341 raise TypeError(
342 342 b"Manifest values must be a tuple of (node, flags)."
343 343 )
344 344 hashval = value[0]
345 345 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
346 346 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
347 347 flags = value[1]
348 348 if not isinstance(flags, bytes) or len(flags) > 1:
349 349 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
350 350 needle, found = self.bsearch2(key)
351 351 if found:
352 352 # put the item
353 353 pos = self.positions[needle]
354 354 if pos < 0:
355 355 self.extradata[-pos - 1] = (key, hashval, value[1])
356 356 else:
357 357 # just don't bother
358 358 self.extradata.append((key, hashval, value[1]))
359 359 self.positions[needle] = -len(self.extradata)
360 360 else:
361 361 # not found, put it in with extra positions
362 362 self.extradata.append((key, hashval, value[1]))
363 363 self.positions = (
364 364 self.positions[:needle]
365 365 + [-len(self.extradata)]
366 366 + self.positions[needle:]
367 367 )
368 368 self.extrainfo = (
369 369 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
370 370 )
371 371
372 372 def copy(self) -> '_LazyManifest':
373 373 # XXX call _compact like in C?
374 374 return _lazymanifest(
375 375 self._nodelen,
376 376 self.data,
377 377 self.positions,
378 378 self.extrainfo,
379 379 self.extradata,
380 380 self.hasremovals,
381 381 )
382 382
383 383 def _compact(self) -> None:
384 384 # hopefully not called TOO often
385 385 if len(self.extradata) == 0 and not self.hasremovals:
386 386 return
387 387 l = []
388 388 i = 0
389 389 offset = 0
390 390 self.extrainfo = [0] * len(self.positions)
391 391 while i < len(self.positions):
392 392 if self.positions[i] >= 0:
393 393 cur = self.positions[i]
394 394 last_cut = cur
395 395
396 396 # Collect all contiguous entries in the buffer at the current
397 397 # offset, breaking out only for added/modified items held in
398 398 # extradata, or a deleted line prior to the next position.
399 399 while True:
400 400 self.positions[i] = offset
401 401 i += 1
402 402 if i == len(self.positions) or self.positions[i] < 0:
403 403 break
404 404
405 405 # A removed file has no positions[] entry, but does have an
406 406 # overwritten first byte. Break out and find the end of the
407 407 # current good entry/entries if there is a removed file
408 408 # before the next position.
409 409 if (
410 410 self.hasremovals
411 411 and self.data.find(b'\n\x00', cur, self.positions[i])
412 412 != -1
413 413 ):
414 414 break
415 415
416 416 offset += self.positions[i] - cur
417 417 cur = self.positions[i]
418 418 end_cut = self.data.find(b'\n', cur)
419 419 if end_cut != -1:
420 420 end_cut += 1
421 421 offset += end_cut - cur
422 422 l.append(self.data[last_cut:end_cut])
423 423 else:
424 424 while i < len(self.positions) and self.positions[i] < 0:
425 425 cur = self.positions[i]
426 426 t = self.extradata[-cur - 1]
427 427 l.append(self._pack(t))
428 428 self.positions[i] = offset
429 429 # Hashes are either 20 bytes (old sha1s) or 32
430 430 # bytes (new non-sha1).
431 431 hlen = 20
432 432 if len(t[1]) > 25:
433 433 hlen = 32
434 434 if len(t[1]) > hlen:
435 435 self.extrainfo[i] = ord(t[1][hlen + 1])
436 436 offset += len(l[-1])
437 437 i += 1
438 438 self.data = b''.join(l)
439 439 self.hasremovals = False
440 440 self.extradata = []
441 441
442 442 def _pack(self, d: Tuple[bytes, bytes, bytes]) -> bytes:
443 443 n = d[1]
444 444 assert len(n) in (20, 32)
445 445 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
446 446
447 447 def text(self) -> ByteString:
448 448 self._compact()
449 449 return self.data
450 450
451 451 def diff(
452 452 self, m2: '_LazyManifest', clean: bool = False
453 453 ) -> Dict[
454 454 bytes,
455 455 Optional[
456 456 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
457 457 ],
458 458 ]:
459 459 '''Finds changes between the current manifest and m2.'''
460 460 # XXX think whether efficiency matters here
461 461 diff = {}
462 462
463 463 for fn, e1, flags in self.iterentries():
464 464 if fn not in m2:
465 465 diff[fn] = (e1, flags), (None, b'')
466 466 else:
467 467 e2 = m2[fn]
468 468 if (e1, flags) != e2:
469 469 diff[fn] = (e1, flags), e2
470 470 elif clean:
471 471 diff[fn] = None
472 472
473 473 for fn, e2, flags in m2.iterentries():
474 474 if fn not in self:
475 475 diff[fn] = (None, b''), (e2, flags)
476 476
477 477 return diff
478 478
479 479 def iterentries(self) -> lazymanifestiterentries:
480 480 return lazymanifestiterentries(self)
481 481
482 482 def iterkeys(self) -> lazymanifestiter:
483 483 return lazymanifestiter(self)
484 484
485 485 def __iter__(self) -> lazymanifestiter:
486 486 return lazymanifestiter(self)
487 487
488 488 def __len__(self) -> int:
489 489 return len(self.positions)
490 490
491 491 def filtercopy(self, filterfn: Callable[[bytes], bool]) -> '_LazyManifest':
492 492 # XXX should be optimized
493 493 c = _lazymanifest(self._nodelen, b'')
494 494 for f, n, fl in self.iterentries():
495 495 if filterfn(f):
496 496 c[f] = n, fl
497 497 return c
498 498
499 499
500 500 try:
501 501 _lazymanifest = parsers.lazymanifest
502 502 except AttributeError:
503 503 _lazymanifest = _LazyManifest
504 504
505 505
506 506 class ManifestDict:
507 507 def __init__(self, nodelen: int, data: ByteString = b''):
508 508 self._nodelen = nodelen
509 509 self._lm = _lazymanifest(nodelen, data)
510 510
511 511 def __getitem__(self, key: bytes) -> bytes:
512 512 return self._lm[key][0]
513 513
514 514 def find(self, key: bytes) -> Tuple[bytes, bytes]:
515 515 return self._lm[key]
516 516
517 517 def __len__(self) -> int:
518 518 return len(self._lm)
519 519
520 520 def __nonzero__(self) -> bool:
521 521 # nonzero is covered by the __len__ function, but implementing it here
522 522 # makes it easier for extensions to override.
523 523 return len(self._lm) != 0
524 524
525 525 __bool__ = __nonzero__
526 526
527 527 def set(self, key: bytes, node: bytes, flags: bytes) -> None:
528 528 self._lm[key] = node, flags
529 529
530 530 def __setitem__(self, key: bytes, node: bytes) -> None:
531 531 self._lm[key] = node, self.flags(key)
532 532
533 533 def __contains__(self, key: bytes) -> bool:
534 534 if key is None:
535 535 return False
536 536 return key in self._lm
537 537
538 538 def __delitem__(self, key: bytes) -> None:
539 539 del self._lm[key]
540 540
541 541 def __iter__(self) -> Iterator[bytes]:
542 542 return self._lm.__iter__()
543 543
544 544 def iterkeys(self) -> Iterator[bytes]:
545 545 return self._lm.iterkeys()
546 546
547 547 def keys(self) -> List[bytes]:
548 548 return list(self.iterkeys())
549 549
550 550 def filesnotin(self, m2, match=None) -> Set[bytes]:
551 551 '''Set of files in this manifest that are not in the other'''
552 552 if match is not None:
553 553 match = matchmod.badmatch(match, lambda path, msg: None)
554 554 sm2 = set(m2.walk(match))
555 555 return {f for f in self.walk(match) if f not in sm2}
556 556 return {f for f in self if f not in m2}
557 557
558 558 @propertycache
559 559 def _dirs(self) -> pathutil.dirs:
560 560 return pathutil.dirs(self)
561 561
562 562 def dirs(self) -> pathutil.dirs:
563 563 return self._dirs
564 564
565 565 def hasdir(self, dir: bytes) -> bool:
566 566 return dir in self._dirs
567 567
568 568 def _filesfastpath(self, match: matchmod.basematcher) -> bool:
569 569 """Checks whether we can correctly and quickly iterate over matcher
570 570 files instead of over manifest files."""
571 571 files = match.files()
572 572 return len(files) < 100 and (
573 573 match.isexact()
574 574 or (match.prefix() and all(fn in self for fn in files))
575 575 )
576 576
577 577 def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
578 578 """Generates matching file names.
579 579
580 580 Equivalent to manifest.matches(match).iterkeys(), but without creating
581 581 an entirely new manifest.
582 582
583 583 It also reports nonexistent files by marking them bad with match.bad().
584 584 """
585 585 if match.always():
586 586 for f in iter(self):
587 587 yield f
588 588 return
589 589
590 590 fset = set(match.files())
591 591
592 592 # avoid the entire walk if we're only looking for specific files
593 593 if self._filesfastpath(match):
594 594 for fn in sorted(fset):
595 595 if fn in self:
596 596 yield fn
597 597 return
598 598
599 599 for fn in self:
600 600 if fn in fset:
601 601 # specified pattern is the exact name
602 602 fset.remove(fn)
603 603 if match(fn):
604 604 yield fn
605 605
606 606 # for dirstate.walk, files=[''] means "walk the whole tree".
607 607 # follow that here, too
608 608 fset.discard(b'')
609 609
610 610 for fn in sorted(fset):
611 611 if not self.hasdir(fn):
612 612 match.bad(fn, None)
613 613
614 614 def _matches(self, match: matchmod.basematcher) -> 'ManifestDict':
615 615 '''generate a new manifest filtered by the match argument'''
616 616 if match.always():
617 617 return self.copy()
618 618
619 619 if self._filesfastpath(match):
620 620 m = manifestdict(self._nodelen)
621 621 lm = self._lm
622 622 for fn in match.files():
623 623 if fn in lm:
624 624 m._lm[fn] = lm[fn]
625 625 return m
626 626
627 627 m = manifestdict(self._nodelen)
628 628 m._lm = self._lm.filtercopy(match)
629 629 return m
630 630
631 631 def diff(
632 632 self,
633 633 m2: 'ManifestDict',
634 634 match: Optional[matchmod.basematcher] = None,
635 635 clean: bool = False,
636 636 ) -> Dict[
637 637 bytes,
638 638 Optional[
639 639 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
640 640 ],
641 641 ]:
642 642 """Finds changes between the current manifest and m2.
643 643
644 644 Args:
645 645 m2: the manifest to which this manifest should be compared.
646 646 clean: if true, include files unchanged between these manifests
647 647 with a None value in the returned dictionary.
648 648
649 649 The result is returned as a dict with filename as key and
650 650 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
651 651 nodeid in the current/other manifest and fl1/fl2 is the flag
652 652 in the current/other manifest. Where the file does not exist,
653 653 the nodeid will be None and the flags will be the empty
654 654 string.
655 655 """
656 656 if match:
657 657 m1 = self._matches(match)
658 658 m2 = m2._matches(match)
659 659 return m1.diff(m2, clean=clean)
660 660 return self._lm.diff(m2._lm, clean)
661 661
662 662 def setflag(self, key: bytes, flag: bytes) -> None:
663 663 if flag not in _manifestflags:
664 664 raise TypeError(b"Invalid manifest flag set.")
665 665 self._lm[key] = self[key], flag
666 666
667 667 def get(self, key: bytes, default=None) -> Optional[bytes]:
668 668 try:
669 669 return self._lm[key][0]
670 670 except KeyError:
671 671 return default
672 672
673 673 def flags(self, key: bytes) -> bytes:
674 674 try:
675 675 return self._lm[key][1]
676 676 except KeyError:
677 677 return b''
678 678
679 679 def copy(self) -> 'ManifestDict':
680 680 c = manifestdict(self._nodelen)
681 681 c._lm = self._lm.copy()
682 682 return c
683 683
684 684 def items(self) -> Iterator[Tuple[bytes, bytes]]:
685 685 return (x[:2] for x in self._lm.iterentries())
686 686
687 687 def iteritems(self) -> Iterator[Tuple[bytes, bytes]]:
688 688 return (x[:2] for x in self._lm.iterentries())
689 689
690 690 def iterentries(self) -> Iterator[Tuple[bytes, bytes, bytes]]:
691 691 return self._lm.iterentries()
692 692
693 693 def text(self) -> ByteString:
694 694 # most likely uses native version
695 695 return self._lm.text()
696 696
697 697 def fastdelta(
698 698 self, base: ByteString, changes: Iterable[Tuple[bytes, bool]]
699 699 ) -> Tuple[ByteString, ByteString]:
700 700 """Given a base manifest text as a bytearray and a list of changes
701 701 relative to that text, compute a delta that can be used by revlog.
702 702 """
703 703 delta = []
704 704 dstart = None
705 705 dend = None
706 706 dline = [b""]
707 707 start = 0
708 708 # zero copy representation of base as a buffer
709 709 addbuf = util.buffer(base)
710 710
711 711 changes = list(changes)
712 712 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
713 713 # start with a readonly loop that finds the offset of
714 714 # each line and creates the deltas
715 715 for f, todelete in changes:
716 716 # bs will either be the index of the item or the insert point
717 717 start, end = _msearch(addbuf, f, start)
718 718 if not todelete:
719 719 h, fl = self._lm[f]
720 720 l = b"%s\0%s%s\n" % (f, hex(h), fl)
721 721 else:
722 722 if start == end:
723 723 # item we want to delete was not found, error out
724 724 raise AssertionError(
725 725 _(b"failed to remove %s from manifest") % f
726 726 )
727 727 l = b""
728 728 if dstart is not None and dstart <= start and dend >= start:
729 729 if dend < end:
730 730 dend = end
731 731 if l:
732 732 dline.append(l)
733 733 else:
734 734 if dstart is not None:
735 735 delta.append((dstart, dend, b"".join(dline)))
736 736 dstart = start
737 737 dend = end
738 738 dline = [l]
739 739
740 740 if dstart is not None:
741 741 delta.append((dstart, dend, b"".join(dline)))
742 742 # apply the delta to the base, and get a delta for addrevision
743 743 deltatext, arraytext = _addlistdelta(base, delta)
744 744 else:
745 745 # For large changes, it's much cheaper to just build the text and
746 746 # diff it.
747 747 arraytext = bytearray(self.text())
748 748 deltatext = mdiff.textdiff(
749 749 util.buffer(base), util.buffer(arraytext)
750 750 )
751 751
752 752 return arraytext, deltatext
753 753
754 754
755 755 manifestdict = interfaceutil.implementer(repository.imanifestdict)(ManifestDict)
756 756
757 757 if typing.TYPE_CHECKING:
758 758 manifestdict = ManifestDict
759 759
760 760
761 761 def _msearch(
762 762 m: ByteString, s: bytes, lo: int = 0, hi: Optional[int] = None
763 763 ) -> Tuple[int, int]:
764 764 """return a tuple (start, end) that says where to find s within m.
765 765
766 766 If the string is found m[start:end] are the line containing
767 767 that string. If start == end the string was not found and
768 768 they indicate the proper sorted insertion point.
769 769 """
770 770
771 771 def advance(i: int, c: bytes):
772 772 while i < lenm and m[i : i + 1] != c:
773 773 i += 1
774 774 return i
775 775
776 776 if not s:
777 777 return (lo, lo)
778 778 lenm = len(m)
779 779 if not hi:
780 780 hi = lenm
781 781 while lo < hi:
782 782 mid = (lo + hi) // 2
783 783 start = mid
784 784 while start > 0 and m[start - 1 : start] != b'\n':
785 785 start -= 1
786 786 end = advance(start, b'\0')
787 787 if bytes(m[start:end]) < s:
788 788 # we know that after the null there are 40 bytes of sha1
789 789 # this translates to the bisect lo = mid + 1
790 790 lo = advance(end + 40, b'\n') + 1
791 791 else:
792 792 # this translates to the bisect hi = mid
793 793 hi = start
794 794 end = advance(lo, b'\0')
795 795 found = m[lo:end]
796 796 if s == found:
797 797 # we know that after the null there are 40 bytes of sha1
798 798 end = advance(end + 40, b'\n')
799 799 return (lo, end + 1)
800 800 else:
801 801 return (lo, lo)
802 802
803 803
804 804 def _checkforbidden(l: Iterable[bytes]) -> None:
805 805 """Check filenames for illegal characters."""
806 806 for f in l:
807 807 if b'\n' in f or b'\r' in f:
808 808 raise error.StorageError(
809 809 _(b"'\\n' and '\\r' disallowed in filenames: %r")
810 810 % pycompat.bytestr(f)
811 811 )
812 812
813 813
814 814 # apply the changes collected during the bisect loop to our addlist
815 815 # return a delta suitable for addrevision
816 816 def _addlistdelta(
817 817 addlist: ByteString,
818 818 x: Iterable[Tuple[int, int, bytes]],
819 819 ) -> Tuple[bytes, ByteString]:
820 820 # for large addlist arrays, building a new array is cheaper
821 821 # than repeatedly modifying the existing one
822 822 currentposition = 0
823 823 newaddlist = bytearray()
824 824
825 825 for start, end, content in x:
826 826 newaddlist += addlist[currentposition:start]
827 827 if content:
828 828 newaddlist += bytearray(content)
829 829
830 830 currentposition = end
831 831
832 832 newaddlist += addlist[currentposition:]
833 833
834 834 deltatext = b"".join(
835 835 struct.pack(b">lll", start, end, len(content)) + content
836 836 for start, end, content in x
837 837 )
838 838 return deltatext, newaddlist
839 839
840 840
841 841 def _splittopdir(f: bytes) -> Tuple[bytes, bytes]:
842 842 if b'/' in f:
843 843 dir, subpath = f.split(b'/', 1)
844 844 return dir + b'/', subpath
845 845 else:
846 846 return b'', f
847 847
848 848
849 849 _noop = lambda s: None
850 850
851 851
852 852 class TreeManifest:
853 _dir: bytes
854 _dirs: Dict[bytes, 'TreeManifest']
855 _dirty: bool
856 _files: Dict[bytes, bytes]
857 _flags: Dict[bytes, bytes]
858
853 859 def __init__(self, nodeconstants, dir: bytes = b'', text: bytes = b''):
854 860 self._dir = dir
855 861 self.nodeconstants = nodeconstants
856 862 self._node = self.nodeconstants.nullid
857 863 self._nodelen = self.nodeconstants.nodelen
858 864 self._loadfunc = _noop
859 865 self._copyfunc = _noop
860 866 self._dirty = False
861 self._dirs: Dict[bytes, 'TreeManifest'] = {}
867 self._dirs = {}
862 868 self._lazydirs: Dict[
863 869 bytes,
864 870 Tuple[bytes, Callable[[bytes, bytes], 'TreeManifest'], bool],
865 871 ] = {}
866 872 # Using _lazymanifest here is a little slower than plain old dicts
867 self._files: Dict[bytes, bytes] = {}
873 self._files = {}
868 874 self._flags = {}
869 875 if text:
870 876
871 877 def readsubtree(subdir, subm):
872 878 raise AssertionError(
873 879 b'treemanifest constructor only accepts flat manifests'
874 880 )
875 881
876 882 self.parse(text, readsubtree)
877 883 self._dirty = True # Mark flat manifest dirty after parsing
878 884
879 885 def _subpath(self, path: bytes) -> bytes:
880 886 return self._dir + path
881 887
882 888 def _loadalllazy(self) -> None:
883 889 selfdirs = self._dirs
884 890 subpath = self._subpath
885 891 for d, (node, readsubtree, docopy) in self._lazydirs.items():
886 892 if docopy:
887 893 selfdirs[d] = readsubtree(subpath(d), node).copy()
888 894 else:
889 895 selfdirs[d] = readsubtree(subpath(d), node)
890 896 self._lazydirs.clear()
891 897
892 898 def _loadlazy(self, d: bytes) -> None:
893 899 v = self._lazydirs.get(d)
894 900 if v is not None:
895 901 node, readsubtree, docopy = v
896 902 if docopy:
897 903 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
898 904 else:
899 905 self._dirs[d] = readsubtree(self._subpath(d), node)
900 906 del self._lazydirs[d]
901 907
902 908 def _loadchildrensetlazy(
903 909 self, visit: Union[Set[bytes], bytes]
904 910 ) -> Optional[Set[bytes]]:
905 911 if not visit:
906 912 return None
907 913 if visit == b'all' or visit == b'this':
908 914 self._loadalllazy()
909 915 return None
910 916
911 917 visit = cast(Set[bytes], visit)
912 918
913 919 loadlazy = self._loadlazy
914 920 for k in visit:
915 921 loadlazy(k + b'/')
916 922 return visit
917 923
918 924 def _loaddifflazy(self, t1: 'TreeManifest', t2: 'TreeManifest'):
919 925 """load items in t1 and t2 if they're needed for diffing.
920 926
921 927 The criteria currently is:
922 928 - if it's not present in _lazydirs in either t1 or t2, load it in the
923 929 other (it may already be loaded or it may not exist, doesn't matter)
924 930 - if it's present in _lazydirs in both, compare the nodeid; if it
925 931 differs, load it in both
926 932 """
927 933 toloadlazy = []
928 934 for d, v1 in t1._lazydirs.items():
929 935 v2 = t2._lazydirs.get(d)
930 936 if v2 is None or v2[0] != v1[0]:
931 937 toloadlazy.append(d)
932 938 for d, v1 in t2._lazydirs.items():
933 939 if d not in t1._lazydirs:
934 940 toloadlazy.append(d)
935 941
936 942 for d in toloadlazy:
937 943 t1._loadlazy(d)
938 944 t2._loadlazy(d)
939 945
940 946 def __len__(self) -> int:
941 947 self._load()
942 948 size = len(self._files)
943 949 self._loadalllazy()
944 950 for m in self._dirs.values():
945 951 size += m.__len__()
946 952 return size
947 953
948 954 def __nonzero__(self) -> bool:
949 955 # Faster than "__len__() != 0" since it avoids loading sub-manifests
950 956 return not self._isempty()
951 957
952 958 __bool__ = __nonzero__
953 959
954 960 def _isempty(self) -> bool:
955 961 self._load() # for consistency; already loaded by all callers
956 962 # See if we can skip loading everything.
957 963 if self._files or (
958 964 self._dirs and any(not m._isempty() for m in self._dirs.values())
959 965 ):
960 966 return False
961 967 self._loadalllazy()
962 968 return not self._dirs or all(m._isempty() for m in self._dirs.values())
963 969
964 970 @encoding.strmethod
965 971 def __repr__(self) -> bytes:
966 972 return (
967 973 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
968 974 % (
969 975 self._dir,
970 976 hex(self._node),
971 977 bool(self._loadfunc is _noop),
972 978 self._dirty,
973 979 id(self),
974 980 )
975 981 )
976 982
977 983 def dir(self) -> bytes:
978 984 """The directory that this tree manifest represents, including a
979 985 trailing '/'. Empty string for the repo root directory."""
980 986 return self._dir
981 987
982 988 def node(self) -> bytes:
983 989 """This node of this instance. nullid for unsaved instances. Should
984 990 be updated when the instance is read or written from a revlog.
985 991 """
986 992 assert not self._dirty
987 993 return self._node
988 994
989 995 def setnode(self, node: bytes) -> None:
990 996 self._node = node
991 997 self._dirty = False
992 998
993 999 def iterentries(
994 1000 self,
995 1001 ) -> Iterator[Tuple[bytes, Union[bytes, 'TreeManifest'], bytes]]:
996 1002 self._load()
997 1003 self._loadalllazy()
998 1004 for p, n in sorted(
999 1005 itertools.chain(self._dirs.items(), self._files.items())
1000 1006 ):
1001 1007 if p in self._files:
1002 1008 yield self._subpath(p), n, self._flags.get(p, b'')
1003 1009 else:
1004 1010 for x in n.iterentries():
1005 1011 yield x
1006 1012
1007 1013 def items(self) -> Iterator[Tuple[bytes, Union[bytes, 'TreeManifest']]]:
1008 1014 self._load()
1009 1015 self._loadalllazy()
1010 1016 for p, n in sorted(
1011 1017 itertools.chain(self._dirs.items(), self._files.items())
1012 1018 ):
1013 1019 if p in self._files:
1014 1020 yield self._subpath(p), n
1015 1021 else:
1016 1022 for f, sn in n.items():
1017 1023 yield f, sn
1018 1024
1019 1025 iteritems = items
1020 1026
1021 1027 def iterkeys(self) -> Iterator[bytes]:
1022 1028 self._load()
1023 1029 self._loadalllazy()
1024 1030 for p in sorted(itertools.chain(self._dirs, self._files)):
1025 1031 if p in self._files:
1026 1032 yield self._subpath(p)
1027 1033 else:
1028 1034 for f in self._dirs[p]:
1029 1035 yield f
1030 1036
1031 1037 def keys(self) -> List[bytes]:
1032 1038 return list(self.iterkeys())
1033 1039
1034 1040 def __iter__(self) -> Iterator[bytes]:
1035 1041 return self.iterkeys()
1036 1042
1037 1043 def __contains__(self, f: bytes) -> bool:
1038 1044 if f is None:
1039 1045 return False
1040 1046 self._load()
1041 1047 dir, subpath = _splittopdir(f)
1042 1048 if dir:
1043 1049 self._loadlazy(dir)
1044 1050
1045 1051 if dir not in self._dirs:
1046 1052 return False
1047 1053
1048 1054 return self._dirs[dir].__contains__(subpath)
1049 1055 else:
1050 1056 return f in self._files
1051 1057
1052 1058 def get(self, f: bytes, default: Optional[bytes] = None) -> Optional[bytes]:
1053 1059 self._load()
1054 1060 dir, subpath = _splittopdir(f)
1055 1061 if dir:
1056 1062 self._loadlazy(dir)
1057 1063
1058 1064 if dir not in self._dirs:
1059 1065 return default
1060 1066 return self._dirs[dir].get(subpath, default)
1061 1067 else:
1062 1068 return self._files.get(f, default)
1063 1069
1064 1070 def __getitem__(self, f: bytes) -> bytes:
1065 1071 self._load()
1066 1072 dir, subpath = _splittopdir(f)
1067 1073 if dir:
1068 1074 self._loadlazy(dir)
1069 1075
1070 1076 return self._dirs[dir].__getitem__(subpath)
1071 1077 else:
1072 1078 return self._files[f]
1073 1079
1074 1080 def flags(self, f: bytes) -> bytes:
1075 1081 self._load()
1076 1082 dir, subpath = _splittopdir(f)
1077 1083 if dir:
1078 1084 self._loadlazy(dir)
1079 1085
1080 1086 if dir not in self._dirs:
1081 1087 return b''
1082 1088 return self._dirs[dir].flags(subpath)
1083 1089 else:
1084 1090 if f in self._lazydirs or f in self._dirs:
1085 1091 return b''
1086 1092 return self._flags.get(f, b'')
1087 1093
1088 1094 def find(self, f: bytes) -> Tuple[bytes, bytes]:
1089 1095 self._load()
1090 1096 dir, subpath = _splittopdir(f)
1091 1097 if dir:
1092 1098 self._loadlazy(dir)
1093 1099
1094 1100 return self._dirs[dir].find(subpath)
1095 1101 else:
1096 1102 return self._files[f], self._flags.get(f, b'')
1097 1103
1098 1104 def __delitem__(self, f: bytes) -> None:
1099 1105 self._load()
1100 1106 dir, subpath = _splittopdir(f)
1101 1107 if dir:
1102 1108 self._loadlazy(dir)
1103 1109
1104 1110 self._dirs[dir].__delitem__(subpath)
1105 1111 # If the directory is now empty, remove it
1106 1112 if self._dirs[dir]._isempty():
1107 1113 del self._dirs[dir]
1108 1114 else:
1109 1115 del self._files[f]
1110 1116 if f in self._flags:
1111 1117 del self._flags[f]
1112 1118 self._dirty = True
1113 1119
1114 1120 def set(self, f: bytes, node: bytes, flags: bytes) -> None:
1115 1121 """Set both the node and the flags for path f."""
1116 1122 assert node is not None
1117 1123 if flags not in _manifestflags:
1118 1124 raise TypeError(b"Invalid manifest flag set.")
1119 1125 self._load()
1120 1126 dir, subpath = _splittopdir(f)
1121 1127 if dir:
1122 1128 self._loadlazy(dir)
1123 1129 if dir not in self._dirs:
1124 1130 self._dirs[dir] = treemanifest(
1125 1131 self.nodeconstants, self._subpath(dir)
1126 1132 )
1127 1133 self._dirs[dir].set(subpath, node, flags)
1128 1134 else:
1129 1135 assert len(node) in (20, 32)
1130 1136 self._files[f] = node
1131 1137 self._flags[f] = flags
1132 1138 self._dirty = True
1133 1139
1134 1140 def __setitem__(self, f: bytes, n: bytes) -> None:
1135 1141 assert n is not None
1136 1142 self._load()
1137 1143 dir, subpath = _splittopdir(f)
1138 1144 if dir:
1139 1145 self._loadlazy(dir)
1140 1146 if dir not in self._dirs:
1141 1147 self._dirs[dir] = treemanifest(
1142 1148 self.nodeconstants, self._subpath(dir)
1143 1149 )
1144 1150 self._dirs[dir].__setitem__(subpath, n)
1145 1151 else:
1146 1152 # manifest nodes are either 20 bytes or 32 bytes,
1147 1153 # depending on the hash in use. Assert this as historically
1148 1154 # sometimes extra bytes were added.
1149 1155 assert len(n) in (20, 32)
1150 1156 self._files[f] = n
1151 1157 self._dirty = True
1152 1158
1153 1159 def _load(self) -> None:
1154 1160 if self._loadfunc is not _noop:
1155 1161 lf, self._loadfunc = self._loadfunc, _noop
1156 1162 lf(self)
1157 1163 elif self._copyfunc is not _noop:
1158 1164 cf, self._copyfunc = self._copyfunc, _noop
1159 1165 cf(self)
1160 1166
1161 1167 def setflag(self, f: bytes, flags: bytes) -> None:
1162 1168 """Set the flags (symlink, executable) for path f."""
1163 1169 if flags not in _manifestflags:
1164 1170 raise TypeError(b"Invalid manifest flag set.")
1165 1171 self._load()
1166 1172 dir, subpath = _splittopdir(f)
1167 1173 if dir:
1168 1174 self._loadlazy(dir)
1169 1175 if dir not in self._dirs:
1170 1176 self._dirs[dir] = treemanifest(
1171 1177 self.nodeconstants, self._subpath(dir)
1172 1178 )
1173 1179 self._dirs[dir].setflag(subpath, flags)
1174 1180 else:
1175 1181 self._flags[f] = flags
1176 1182 self._dirty = True
1177 1183
1178 1184 def copy(self) -> 'TreeManifest':
1179 1185 copy = treemanifest(self.nodeconstants, self._dir)
1180 1186 copy._node = self._node
1181 1187 copy._dirty = self._dirty
1182 1188 if self._copyfunc is _noop:
1183 1189
1184 1190 def _copyfunc(s):
1185 1191 self._load()
1186 1192 s._lazydirs = {
1187 1193 d: (n, r, True) for d, (n, r, c) in self._lazydirs.items()
1188 1194 }
1189 1195 sdirs = s._dirs
1190 1196 for d, v in self._dirs.items():
1191 1197 sdirs[d] = v.copy()
1192 1198 s._files = dict.copy(self._files)
1193 1199 s._flags = dict.copy(self._flags)
1194 1200
1195 1201 if self._loadfunc is _noop:
1196 1202 _copyfunc(copy)
1197 1203 else:
1198 1204 copy._copyfunc = _copyfunc
1199 1205 else:
1200 1206 copy._copyfunc = self._copyfunc
1201 1207 return copy
1202 1208
1203 1209 def filesnotin(
1204 1210 self, m2: 'TreeManifest', match: Optional[matchmod.basematcher] = None
1205 1211 ) -> Set[bytes]:
1206 1212 '''Set of files in this manifest that are not in the other'''
1207 1213 if match and not match.always():
1208 1214 m1 = self._matches(match)
1209 1215 m2 = m2._matches(match)
1210 1216 return m1.filesnotin(m2)
1211 1217
1212 1218 files = set()
1213 1219
1214 1220 def _filesnotin(t1, t2):
1215 1221 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1216 1222 return
1217 1223 t1._load()
1218 1224 t2._load()
1219 1225 self._loaddifflazy(t1, t2)
1220 1226 for d, m1 in t1._dirs.items():
1221 1227 if d in t2._dirs:
1222 1228 m2 = t2._dirs[d]
1223 1229 _filesnotin(m1, m2)
1224 1230 else:
1225 1231 files.update(m1.iterkeys())
1226 1232
1227 1233 for fn in t1._files:
1228 1234 if fn not in t2._files:
1229 1235 files.add(t1._subpath(fn))
1230 1236
1231 1237 _filesnotin(self, m2)
1232 1238 return files
1233 1239
1234 1240 @propertycache
1235 1241 def _alldirs(self) -> pathutil.dirs:
1236 1242 return pathutil.dirs(self)
1237 1243
1238 1244 def dirs(self) -> pathutil.dirs:
1239 1245 return self._alldirs
1240 1246
1241 1247 def hasdir(self, dir: bytes) -> bool:
1242 1248 self._load()
1243 1249 topdir, subdir = _splittopdir(dir)
1244 1250 if topdir:
1245 1251 self._loadlazy(topdir)
1246 1252 if topdir in self._dirs:
1247 1253 return self._dirs[topdir].hasdir(subdir)
1248 1254 return False
1249 1255 dirslash = dir + b'/'
1250 1256 return dirslash in self._dirs or dirslash in self._lazydirs
1251 1257
1252 1258 def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
1253 1259 """Generates matching file names.
1254 1260
1255 1261 It also reports nonexistent files by marking them bad with match.bad().
1256 1262 """
1257 1263 if match.always():
1258 1264 for f in iter(self):
1259 1265 yield f
1260 1266 return
1261 1267
1262 1268 fset = set(match.files())
1263 1269
1264 1270 for fn in self._walk(match):
1265 1271 if fn in fset:
1266 1272 # specified pattern is the exact name
1267 1273 fset.remove(fn)
1268 1274 yield fn
1269 1275
1270 1276 # for dirstate.walk, files=[''] means "walk the whole tree".
1271 1277 # follow that here, too
1272 1278 fset.discard(b'')
1273 1279
1274 1280 for fn in sorted(fset):
1275 1281 if not self.hasdir(fn):
1276 1282 match.bad(fn, None)
1277 1283
1278 1284 def _walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
1279 1285 '''Recursively generates matching file names for walk().'''
1280 1286 visit = match.visitchildrenset(self._dir[:-1])
1281 1287 if not visit:
1282 1288 return
1283 1289
1284 1290 # yield this dir's files and walk its submanifests
1285 1291 self._load()
1286 1292 visit = self._loadchildrensetlazy(visit)
1287 1293 for p in sorted(list(self._dirs) + list(self._files)):
1288 1294 if p in self._files:
1289 1295 fullp = self._subpath(p)
1290 1296 if match(fullp):
1291 1297 yield fullp
1292 1298 else:
1293 1299 if not visit or p[:-1] in visit:
1294 1300 for f in self._dirs[p]._walk(match):
1295 1301 yield f
1296 1302
1297 1303 def _matches(self, match: matchmod.basematcher) -> 'TreeManifest':
1298 1304 """recursively generate a new manifest filtered by the match argument."""
1299 1305 if match.always():
1300 1306 return self.copy()
1301 1307 return self._matches_inner(match)
1302 1308
1303 1309 def _matches_inner(self, match: matchmod.basematcher) -> 'TreeManifest':
1304 1310 if match.always():
1305 1311 return self.copy()
1306 1312
1307 1313 visit = match.visitchildrenset(self._dir[:-1])
1308 1314 if visit == b'all':
1309 1315 return self.copy()
1310 1316 ret = treemanifest(self.nodeconstants, self._dir)
1311 1317 if not visit:
1312 1318 return ret
1313 1319
1314 1320 self._load()
1315 1321 for fn in self._files:
1316 1322 # While visitchildrenset *usually* lists only subdirs, this is
1317 1323 # actually up to the matcher and may have some files in the set().
1318 1324 # If visit == 'this', we should obviously look at the files in this
1319 1325 # directory; if visit is a set, and fn is in it, we should inspect
1320 1326 # fn (but no need to inspect things not in the set).
1321 1327 if visit != b'this' and fn not in visit:
1322 1328 continue
1323 1329 fullp = self._subpath(fn)
1324 1330 # visitchildrenset isn't perfect, we still need to call the regular
1325 1331 # matcher code to further filter results.
1326 1332 if not match(fullp):
1327 1333 continue
1328 1334 ret._files[fn] = self._files[fn]
1329 1335 if fn in self._flags:
1330 1336 ret._flags[fn] = self._flags[fn]
1331 1337
1332 1338 visit = self._loadchildrensetlazy(visit)
1333 1339 for dir, subm in self._dirs.items():
1334 1340 if visit and dir[:-1] not in visit:
1335 1341 continue
1336 1342 m = subm._matches_inner(match)
1337 1343 if not m._isempty():
1338 1344 ret._dirs[dir] = m
1339 1345
1340 1346 if not ret._isempty():
1341 1347 ret._dirty = True
1342 1348 return ret
1343 1349
1344 1350 def fastdelta(
1345 1351 self, base: ByteString, changes: Iterable[Tuple[bytes, bool]]
1346 1352 ) -> ByteString:
1347 1353 raise FastdeltaUnavailable()
1348 1354
1349 1355 def diff(
1350 1356 self,
1351 1357 m2: 'TreeManifest',
1352 1358 match: Optional[matchmod.basematcher] = None,
1353 1359 clean: bool = False,
1354 1360 ) -> Dict[
1355 1361 bytes,
1356 1362 Optional[
1357 1363 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
1358 1364 ],
1359 1365 ]:
1360 1366 """Finds changes between the current manifest and m2.
1361 1367
1362 1368 Args:
1363 1369 m2: the manifest to which this manifest should be compared.
1364 1370 clean: if true, include files unchanged between these manifests
1365 1371 with a None value in the returned dictionary.
1366 1372
1367 1373 The result is returned as a dict with filename as key and
1368 1374 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1369 1375 nodeid in the current/other manifest and fl1/fl2 is the flag
1370 1376 in the current/other manifest. Where the file does not exist,
1371 1377 the nodeid will be None and the flags will be the empty
1372 1378 string.
1373 1379 """
1374 1380 if match and not match.always():
1375 1381 m1 = self._matches(match)
1376 1382 m2 = m2._matches(match)
1377 1383 return m1.diff(m2, clean=clean)
1378 1384 result = {}
1379 1385 emptytree = treemanifest(self.nodeconstants)
1380 1386
1381 1387 def _iterativediff(t1, t2, stack):
1382 1388 """compares two tree manifests and append new tree-manifests which
1383 1389 needs to be compared to stack"""
1384 1390 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1385 1391 return
1386 1392 t1._load()
1387 1393 t2._load()
1388 1394 self._loaddifflazy(t1, t2)
1389 1395
1390 1396 for d, m1 in t1._dirs.items():
1391 1397 m2 = t2._dirs.get(d, emptytree)
1392 1398 stack.append((m1, m2))
1393 1399
1394 1400 for d, m2 in t2._dirs.items():
1395 1401 if d not in t1._dirs:
1396 1402 stack.append((emptytree, m2))
1397 1403
1398 1404 for fn, n1 in t1._files.items():
1399 1405 fl1 = t1._flags.get(fn, b'')
1400 1406 n2 = t2._files.get(fn, None)
1401 1407 fl2 = t2._flags.get(fn, b'')
1402 1408 if n1 != n2 or fl1 != fl2:
1403 1409 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1404 1410 elif clean:
1405 1411 result[t1._subpath(fn)] = None
1406 1412
1407 1413 for fn, n2 in t2._files.items():
1408 1414 if fn not in t1._files:
1409 1415 fl2 = t2._flags.get(fn, b'')
1410 1416 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1411 1417
1412 1418 stackls = []
1413 1419 _iterativediff(self, m2, stackls)
1414 1420 while stackls:
1415 1421 t1, t2 = stackls.pop()
1416 1422 # stackls is populated in the function call
1417 1423 _iterativediff(t1, t2, stackls)
1418 1424 return result
1419 1425
1420 1426 def unmodifiedsince(self, m2: 'TreeManifest') -> bool:
1421 1427 return not self._dirty and not m2._dirty and self._node == m2._node
1422 1428
1423 1429 def parse(
1424 1430 self,
1425 1431 text: bytes,
1426 1432 readsubtree: Callable[[bytes, bytes], 'TreeManifest'],
1427 1433 ) -> None:
1428 1434 selflazy = self._lazydirs
1429 1435 for f, n, fl in _parse(self._nodelen, text):
1430 1436 if fl == b't':
1431 1437 f = f + b'/'
1432 1438 # False below means "doesn't need to be copied" and can use the
1433 1439 # cached value from readsubtree directly.
1434 1440 selflazy[f] = (n, readsubtree, False)
1435 1441 elif b'/' in f:
1436 1442 # This is a flat manifest, so use __setitem__ and setflag rather
1437 1443 # than assigning directly to _files and _flags, so we can
1438 1444 # assign a path in a subdirectory, and to mark dirty (compared
1439 1445 # to nullid).
1440 1446 self[f] = n
1441 1447 if fl:
1442 1448 self.setflag(f, fl)
1443 1449 else:
1444 1450 # Assigning to _files and _flags avoids marking as dirty,
1445 1451 # and should be a little faster.
1446 1452 self._files[f] = n
1447 1453 if fl:
1448 1454 self._flags[f] = fl
1449 1455
1450 1456 def text(self) -> ByteString:
1451 1457 """Get the full data of this manifest as a bytestring."""
1452 1458 self._load()
1453 1459 return _text(self.iterentries())
1454 1460
1455 1461 def dirtext(self) -> ByteString:
1456 1462 """Get the full data of this directory as a bytestring. Make sure that
1457 1463 any submanifests have been written first, so their nodeids are correct.
1458 1464 """
1459 1465 self._load()
1460 1466 flags = self.flags
1461 1467 lazydirs = [(d[:-1], v[0], b't') for d, v in self._lazydirs.items()]
1462 1468 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1463 1469 files = [(f, self._files[f], flags(f)) for f in self._files]
1464 1470 return _text(sorted(dirs + files + lazydirs))
1465 1471
1466 1472 def read(
1467 1473 self,
1468 1474 gettext: Callable[[], ByteString],
1469 1475 readsubtree: Callable[[bytes, bytes], 'TreeManifest'],
1470 1476 ) -> None:
1471 1477 def _load_for_read(s):
1472 1478 s.parse(gettext(), readsubtree)
1473 1479 s._dirty = False
1474 1480
1475 1481 self._loadfunc = _load_for_read
1476 1482
1477 1483 def writesubtrees(
1478 1484 self,
1479 1485 m1: 'TreeManifest',
1480 1486 m2: 'TreeManifest',
1481 1487 writesubtree: Callable[
1482 1488 [
1483 1489 Callable[['TreeManifest'], None],
1484 1490 bytes,
1485 1491 bytes,
1486 1492 matchmod.basematcher,
1487 1493 ],
1488 1494 None,
1489 1495 ],
1490 1496 match: matchmod.basematcher,
1491 1497 ) -> None:
1492 1498 self._load() # for consistency; should never have any effect here
1493 1499 m1._load()
1494 1500 m2._load()
1495 1501 emptytree = treemanifest(self.nodeconstants)
1496 1502
1497 1503 def getnode(m, d):
1498 1504 ld = m._lazydirs.get(d)
1499 1505 if ld:
1500 1506 return ld[0]
1501 1507 tree = m._dirs.get(d, emptytree)
1502 1508 assert tree is not None # helps pytype
1503 1509 return tree._node
1504 1510
1505 1511 # let's skip investigating things that `match` says we do not need.
1506 1512 visit = match.visitchildrenset(self._dir[:-1])
1507 1513 visit = self._loadchildrensetlazy(visit)
1508 1514 if visit == b'this' or visit == b'all':
1509 1515 visit = None
1510 1516 for d, subm in self._dirs.items():
1511 1517 if visit and d[:-1] not in visit:
1512 1518 continue
1513 1519 subp1 = getnode(m1, d)
1514 1520 subp2 = getnode(m2, d)
1515 1521 if subp1 == self.nodeconstants.nullid:
1516 1522 subp1, subp2 = subp2, subp1
1517 1523 writesubtree(subm, subp1, subp2, match)
1518 1524
1519 1525 def walksubtrees(
1520 1526 self, matcher: Optional[matchmod.basematcher] = None
1521 1527 ) -> Iterator['TreeManifest']:
1522 1528 """Returns an iterator of the subtrees of this manifest, including this
1523 1529 manifest itself.
1524 1530
1525 1531 If `matcher` is provided, it only returns subtrees that match.
1526 1532 """
1527 1533 if matcher and not matcher.visitdir(self._dir[:-1]):
1528 1534 return
1529 1535 if not matcher or matcher(self._dir[:-1]):
1530 1536 yield self
1531 1537
1532 1538 self._load()
1533 1539 # OPT: use visitchildrenset to avoid loading everything.
1534 1540 self._loadalllazy()
1535 1541 for d, subm in self._dirs.items():
1536 1542 for subtree in subm.walksubtrees(matcher=matcher):
1537 1543 yield subtree
1538 1544
1539 1545
1540 1546 treemanifest = interfaceutil.implementer(repository.imanifestdict)(TreeManifest)
1541 1547
1542 1548 if typing.TYPE_CHECKING:
1543 1549 treemanifest = TreeManifest
1544 1550
1545 1551
1546 1552 class manifestfulltextcache(util.lrucachedict):
1547 1553 """File-backed LRU cache for the manifest cache
1548 1554
1549 1555 File consists of entries, up to EOF:
1550 1556
1551 1557 - 20 bytes node, 4 bytes length, <length> manifest data
1552 1558
1553 1559 These are written in reverse cache order (oldest to newest).
1554 1560
1555 1561 """
1556 1562
1557 1563 _file = b'manifestfulltextcache'
1558 1564
1559 1565 def __init__(self, max):
1560 1566 super(manifestfulltextcache, self).__init__(max)
1561 1567 self._dirty = False
1562 1568 self._read = False
1563 1569 self._opener = None
1564 1570
1565 1571 def read(self):
1566 1572 if self._read or self._opener is None:
1567 1573 return
1568 1574
1569 1575 try:
1570 1576 with self._opener(self._file) as fp:
1571 1577 set = super(manifestfulltextcache, self).__setitem__
1572 1578 # ignore trailing data, this is a cache, corruption is skipped
1573 1579 while True:
1574 1580 # TODO do we need to do work here for sha1 portability?
1575 1581 node = fp.read(20)
1576 1582 if len(node) < 20:
1577 1583 break
1578 1584 try:
1579 1585 size = struct.unpack(b'>L', fp.read(4))[0]
1580 1586 except struct.error:
1581 1587 break
1582 1588 value = bytearray(fp.read(size))
1583 1589 if len(value) != size:
1584 1590 break
1585 1591 set(node, value)
1586 1592 except IOError:
1587 1593 # the file is allowed to be missing
1588 1594 pass
1589 1595
1590 1596 self._read = True
1591 1597 self._dirty = False
1592 1598
1593 1599 def write(self):
1594 1600 if not self._dirty or self._opener is None:
1595 1601 return
1596 1602 # rotate backwards to the first used node
1597 1603 try:
1598 1604 with self._opener(
1599 1605 self._file, b'w', atomictemp=True, checkambig=True
1600 1606 ) as fp:
1601 1607 node = self._head.prev
1602 1608 while True:
1603 1609 if node.key in self._cache:
1604 1610 fp.write(node.key)
1605 1611 fp.write(struct.pack(b'>L', len(node.value)))
1606 1612 fp.write(node.value)
1607 1613 if node is self._head:
1608 1614 break
1609 1615 node = node.prev
1610 1616 except IOError:
1611 1617 # We could not write the cache (eg: permission error)
1612 1618 # the content can be missing.
1613 1619 #
1614 1620 # We could try harder and see if we could recreate a wcache
1615 1621 # directory were we coudl write too.
1616 1622 #
1617 1623 # XXX the error pass silently, having some way to issue an error
1618 1624 # log `ui.log` would be nice.
1619 1625 pass
1620 1626
1621 1627 def __len__(self):
1622 1628 if not self._read:
1623 1629 self.read()
1624 1630 return super(manifestfulltextcache, self).__len__()
1625 1631
1626 1632 def __contains__(self, k):
1627 1633 if not self._read:
1628 1634 self.read()
1629 1635 return super(manifestfulltextcache, self).__contains__(k)
1630 1636
1631 1637 def __iter__(self):
1632 1638 if not self._read:
1633 1639 self.read()
1634 1640 return super(manifestfulltextcache, self).__iter__()
1635 1641
1636 1642 def __getitem__(self, k):
1637 1643 if not self._read:
1638 1644 self.read()
1639 1645 # the cache lru order can change on read
1640 1646 setdirty = self._cache.get(k) is not self._head
1641 1647 value = super(manifestfulltextcache, self).__getitem__(k)
1642 1648 if setdirty:
1643 1649 self._dirty = True
1644 1650 return value
1645 1651
1646 1652 def __setitem__(self, k, v):
1647 1653 if not self._read:
1648 1654 self.read()
1649 1655 super(manifestfulltextcache, self).__setitem__(k, v)
1650 1656 self._dirty = True
1651 1657
1652 1658 def __delitem__(self, k):
1653 1659 if not self._read:
1654 1660 self.read()
1655 1661 super(manifestfulltextcache, self).__delitem__(k)
1656 1662 self._dirty = True
1657 1663
1658 1664 def get(self, k, default=None):
1659 1665 if not self._read:
1660 1666 self.read()
1661 1667 return super(manifestfulltextcache, self).get(k, default=default)
1662 1668
1663 1669 def clear(self, clear_persisted_data=False):
1664 1670 super(manifestfulltextcache, self).clear()
1665 1671 if clear_persisted_data:
1666 1672 self._dirty = True
1667 1673 self.write()
1668 1674 self._read = False
1669 1675
1670 1676
1671 1677 # and upper bound of what we expect from compression
1672 1678 # (real live value seems to be "3")
1673 1679 MAXCOMPRESSION = 3
1674 1680
1675 1681
1676 1682 class FastdeltaUnavailable(Exception):
1677 1683 """Exception raised when fastdelta isn't usable on a manifest."""
1678 1684
1679 1685
1680 1686 class ManifestRevlog:
1681 1687 """A revlog that stores manifest texts. This is responsible for caching the
1682 1688 full-text manifest contents.
1683 1689 """
1684 1690
1685 1691 def __init__(
1686 1692 self,
1687 1693 nodeconstants,
1688 1694 opener,
1689 1695 tree=b'',
1690 1696 dirlogcache=None,
1691 1697 treemanifest=False,
1692 1698 ):
1693 1699 """Constructs a new manifest revlog
1694 1700
1695 1701 `indexfile` - used by extensions to have two manifests at once, like
1696 1702 when transitioning between flatmanifeset and treemanifests.
1697 1703
1698 1704 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1699 1705 options can also be used to make this a tree manifest revlog. The opener
1700 1706 option takes precedence, so if it is set to True, we ignore whatever
1701 1707 value is passed in to the constructor.
1702 1708 """
1703 1709 self.nodeconstants = nodeconstants
1704 1710 # During normal operations, we expect to deal with not more than four
1705 1711 # revs at a time (such as during commit --amend). When rebasing large
1706 1712 # stacks of commits, the number can go up, hence the config knob below.
1707 1713 cachesize = 4
1708 1714 optiontreemanifest = False
1709 1715 persistentnodemap = False
1710 1716 opts = getattr(opener, 'options', None)
1711 1717 if opts is not None:
1712 1718 cachesize = opts.get(b'manifestcachesize', cachesize)
1713 1719 optiontreemanifest = opts.get(b'treemanifest', False)
1714 1720 persistentnodemap = opts.get(b'persistent-nodemap', False)
1715 1721
1716 1722 self._treeondisk = optiontreemanifest or treemanifest
1717 1723
1718 1724 self._fulltextcache = manifestfulltextcache(cachesize)
1719 1725
1720 1726 if tree:
1721 1727 assert self._treeondisk, (tree, b'opts is %r' % opts)
1722 1728
1723 1729 radix = b'00manifest'
1724 1730 if tree:
1725 1731 radix = b"meta/" + tree + radix
1726 1732
1727 1733 self.tree = tree
1728 1734
1729 1735 # The dirlogcache is kept on the root manifest log
1730 1736 if tree:
1731 1737 self._dirlogcache = dirlogcache
1732 1738 else:
1733 1739 self._dirlogcache = {b'': self}
1734 1740
1735 1741 self._revlog = revlog.revlog(
1736 1742 opener,
1737 1743 target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
1738 1744 radix=radix,
1739 1745 # only root indexfile is cached
1740 1746 checkambig=not bool(tree),
1741 1747 mmaplargeindex=True,
1742 1748 upperboundcomp=MAXCOMPRESSION,
1743 1749 persistentnodemap=persistentnodemap,
1744 1750 )
1745 1751
1746 1752 self.index = self._revlog.index
1747 1753
1748 1754 def get_revlog(self):
1749 1755 """return an actual revlog instance if any
1750 1756
1751 1757 This exist because a lot of code leverage the fact the underlying
1752 1758 storage is a revlog for optimization, so giving simple way to access
1753 1759 the revlog instance helps such code.
1754 1760 """
1755 1761 return self._revlog
1756 1762
1757 1763 def _setupmanifestcachehooks(self, repo):
1758 1764 """Persist the manifestfulltextcache on lock release"""
1759 1765 if not hasattr(repo, '_wlockref'):
1760 1766 return
1761 1767
1762 1768 self._fulltextcache._opener = repo.wcachevfs
1763 1769 if repo._currentlock(repo._wlockref) is None:
1764 1770 return
1765 1771
1766 1772 reporef = weakref.ref(repo)
1767 1773 manifestrevlogref = weakref.ref(self)
1768 1774
1769 1775 def persistmanifestcache(success):
1770 1776 # Repo is in an unknown state, do not persist.
1771 1777 if not success:
1772 1778 return
1773 1779
1774 1780 repo = reporef()
1775 1781 self = manifestrevlogref()
1776 1782 if repo is None or self is None:
1777 1783 return
1778 1784 if repo.manifestlog.getstorage(b'') is not self:
1779 1785 # there's a different manifest in play now, abort
1780 1786 return
1781 1787 self._fulltextcache.write()
1782 1788
1783 1789 repo._afterlock(persistmanifestcache)
1784 1790
1785 1791 @property
1786 1792 def fulltextcache(self):
1787 1793 return self._fulltextcache
1788 1794
1789 1795 def clearcaches(self, clear_persisted_data=False):
1790 1796 self._revlog.clearcaches()
1791 1797 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1792 1798 self._dirlogcache = {self.tree: self}
1793 1799
1794 1800 def dirlog(self, d):
1795 1801 if d:
1796 1802 assert self._treeondisk
1797 1803 if d not in self._dirlogcache:
1798 1804 mfrevlog = manifestrevlog(
1799 1805 self.nodeconstants,
1800 1806 self.opener,
1801 1807 d,
1802 1808 self._dirlogcache,
1803 1809 treemanifest=self._treeondisk,
1804 1810 )
1805 1811 self._dirlogcache[d] = mfrevlog
1806 1812 return self._dirlogcache[d]
1807 1813
1808 1814 def add(
1809 1815 self,
1810 1816 m,
1811 1817 transaction,
1812 1818 link,
1813 1819 p1,
1814 1820 p2,
1815 1821 added: Iterable[bytes],
1816 1822 removed: Iterable[bytes],
1817 1823 readtree=None,
1818 1824 match=None,
1819 1825 ):
1820 1826 """add some manifest entry in to the manifest log
1821 1827
1822 1828 input:
1823 1829
1824 1830 m: the manifest dict we want to store
1825 1831 transaction: the open transaction
1826 1832 p1: manifest-node of p1
1827 1833 p2: manifest-node of p2
1828 1834 added: file added/changed compared to parent
1829 1835 removed: file removed compared to parent
1830 1836
1831 1837 tree manifest input:
1832 1838
1833 1839 readtree: a function to read a subtree
1834 1840 match: a filematcher for the subpart of the tree manifest
1835 1841 """
1836 1842 try:
1837 1843 if p1 not in self.fulltextcache:
1838 1844 raise FastdeltaUnavailable()
1839 1845 # If our first parent is in the manifest cache, we can
1840 1846 # compute a delta here using properties we know about the
1841 1847 # manifest up-front, which may save time later for the
1842 1848 # revlog layer.
1843 1849
1844 1850 _checkforbidden(added)
1845 1851 # combine the changed lists into one sorted iterator
1846 1852 work = heapq.merge(
1847 1853 [(x, False) for x in sorted(added)],
1848 1854 [(x, True) for x in sorted(removed)],
1849 1855 )
1850 1856
1851 1857 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1852 1858 cachedelta = self._revlog.rev(p1), deltatext
1853 1859 text = util.buffer(arraytext)
1854 1860 rev = self._revlog.addrevision(
1855 1861 text, transaction, link, p1, p2, cachedelta
1856 1862 )
1857 1863 n = self._revlog.node(rev)
1858 1864 except FastdeltaUnavailable:
1859 1865 # The first parent manifest isn't already loaded or the
1860 1866 # manifest implementation doesn't support fastdelta, so
1861 1867 # we'll just encode a fulltext of the manifest and pass
1862 1868 # that through to the revlog layer, and let it handle the
1863 1869 # delta process.
1864 1870 if self._treeondisk:
1865 1871 assert readtree, b"readtree must be set for treemanifest writes"
1866 1872 assert match, b"match must be specified for treemanifest writes"
1867 1873 m1 = readtree(self.tree, p1)
1868 1874 m2 = readtree(self.tree, p2)
1869 1875 n = self._addtree(
1870 1876 m, transaction, link, m1, m2, readtree, match=match
1871 1877 )
1872 1878 arraytext = None
1873 1879 else:
1874 1880 text = m.text()
1875 1881 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1876 1882 n = self._revlog.node(rev)
1877 1883 arraytext = bytearray(text)
1878 1884
1879 1885 if arraytext is not None:
1880 1886 self.fulltextcache[n] = arraytext
1881 1887
1882 1888 return n
1883 1889
1884 1890 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1885 1891 # If the manifest is unchanged compared to one parent,
1886 1892 # don't write a new revision
1887 1893 if self.tree != b'' and (
1888 1894 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1889 1895 ):
1890 1896 return m.node()
1891 1897
1892 1898 def writesubtree(subm, subp1, subp2, match):
1893 1899 sublog = self.dirlog(subm.dir())
1894 1900 sublog.add(
1895 1901 subm,
1896 1902 transaction,
1897 1903 link,
1898 1904 subp1,
1899 1905 subp2,
1900 1906 None,
1901 1907 None,
1902 1908 readtree=readtree,
1903 1909 match=match,
1904 1910 )
1905 1911
1906 1912 m.writesubtrees(m1, m2, writesubtree, match)
1907 1913 text = m.dirtext()
1908 1914 n = None
1909 1915 if self.tree != b'':
1910 1916 # Double-check whether contents are unchanged to one parent
1911 1917 if text == m1.dirtext():
1912 1918 n = m1.node()
1913 1919 elif text == m2.dirtext():
1914 1920 n = m2.node()
1915 1921
1916 1922 if not n:
1917 1923 rev = self._revlog.addrevision(
1918 1924 text, transaction, link, m1.node(), m2.node()
1919 1925 )
1920 1926 n = self._revlog.node(rev)
1921 1927
1922 1928 # Save nodeid so parent manifest can calculate its nodeid
1923 1929 m.setnode(n)
1924 1930 return n
1925 1931
1926 1932 def __len__(self):
1927 1933 return len(self._revlog)
1928 1934
1929 1935 def __iter__(self):
1930 1936 return self._revlog.__iter__()
1931 1937
1932 1938 def rev(self, node):
1933 1939 return self._revlog.rev(node)
1934 1940
1935 1941 def node(self, rev):
1936 1942 return self._revlog.node(rev)
1937 1943
1938 1944 def lookup(self, value):
1939 1945 return self._revlog.lookup(value)
1940 1946
1941 1947 def parentrevs(self, rev):
1942 1948 return self._revlog.parentrevs(rev)
1943 1949
1944 1950 def parents(self, node):
1945 1951 return self._revlog.parents(node)
1946 1952
1947 1953 def linkrev(self, rev):
1948 1954 return self._revlog.linkrev(rev)
1949 1955
1950 1956 def checksize(self):
1951 1957 return self._revlog.checksize()
1952 1958
1953 1959 def revision(self, node):
1954 1960 return self._revlog.revision(node)
1955 1961
1956 1962 def rawdata(self, node):
1957 1963 return self._revlog.rawdata(node)
1958 1964
1959 1965 def revdiff(self, rev1, rev2):
1960 1966 return self._revlog.revdiff(rev1, rev2)
1961 1967
1962 1968 def cmp(self, node, text):
1963 1969 return self._revlog.cmp(node, text)
1964 1970
1965 1971 def deltaparent(self, rev):
1966 1972 return self._revlog.deltaparent(rev)
1967 1973
1968 1974 def emitrevisions(
1969 1975 self,
1970 1976 nodes,
1971 1977 nodesorder=None,
1972 1978 revisiondata=False,
1973 1979 assumehaveparentrevisions=False,
1974 1980 deltamode=repository.CG_DELTAMODE_STD,
1975 1981 sidedata_helpers=None,
1976 1982 debug_info=None,
1977 1983 ):
1978 1984 return self._revlog.emitrevisions(
1979 1985 nodes,
1980 1986 nodesorder=nodesorder,
1981 1987 revisiondata=revisiondata,
1982 1988 assumehaveparentrevisions=assumehaveparentrevisions,
1983 1989 deltamode=deltamode,
1984 1990 sidedata_helpers=sidedata_helpers,
1985 1991 debug_info=debug_info,
1986 1992 )
1987 1993
1988 1994 def addgroup(
1989 1995 self,
1990 1996 deltas,
1991 1997 linkmapper,
1992 1998 transaction,
1993 1999 alwayscache=False,
1994 2000 addrevisioncb=None,
1995 2001 duplicaterevisioncb=None,
1996 2002 debug_info=None,
1997 2003 delta_base_reuse_policy=None,
1998 2004 ):
1999 2005 return self._revlog.addgroup(
2000 2006 deltas,
2001 2007 linkmapper,
2002 2008 transaction,
2003 2009 alwayscache=alwayscache,
2004 2010 addrevisioncb=addrevisioncb,
2005 2011 duplicaterevisioncb=duplicaterevisioncb,
2006 2012 debug_info=debug_info,
2007 2013 delta_base_reuse_policy=delta_base_reuse_policy,
2008 2014 )
2009 2015
2010 2016 def rawsize(self, rev):
2011 2017 return self._revlog.rawsize(rev)
2012 2018
2013 2019 def getstrippoint(self, minlink):
2014 2020 return self._revlog.getstrippoint(minlink)
2015 2021
2016 2022 def strip(self, minlink, transaction):
2017 2023 return self._revlog.strip(minlink, transaction)
2018 2024
2019 2025 def files(self):
2020 2026 return self._revlog.files()
2021 2027
2022 2028 def clone(self, tr, destrevlog, **kwargs):
2023 2029 if not isinstance(destrevlog, manifestrevlog):
2024 2030 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
2025 2031
2026 2032 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
2027 2033
2028 2034 def storageinfo(
2029 2035 self,
2030 2036 exclusivefiles=False,
2031 2037 sharedfiles=False,
2032 2038 revisionscount=False,
2033 2039 trackedsize=False,
2034 2040 storedsize=False,
2035 2041 ):
2036 2042 return self._revlog.storageinfo(
2037 2043 exclusivefiles=exclusivefiles,
2038 2044 sharedfiles=sharedfiles,
2039 2045 revisionscount=revisionscount,
2040 2046 trackedsize=trackedsize,
2041 2047 storedsize=storedsize,
2042 2048 )
2043 2049
2044 2050 @property
2045 2051 def opener(self):
2046 2052 return self._revlog.opener
2047 2053
2048 2054 @opener.setter
2049 2055 def opener(self, value):
2050 2056 self._revlog.opener = value
2051 2057
2052 2058
2053 2059 manifestrevlog = interfaceutil.implementer(repository.imanifeststorage)(
2054 2060 ManifestRevlog
2055 2061 )
2056 2062
2057 2063 if typing.TYPE_CHECKING:
2058 2064 manifestrevlog = ManifestRevlog
2059 2065
2060 2066 AnyManifestCtx = Union['ManifestCtx', 'TreeManifestCtx']
2061 2067 AnyManifestDict = Union[ManifestDict, TreeManifest]
2062 2068
2063 2069
2064 2070 class ManifestLog:
2065 2071 """A collection class representing the collection of manifest snapshots
2066 2072 referenced by commits in the repository.
2067 2073
2068 2074 In this situation, 'manifest' refers to the abstract concept of a snapshot
2069 2075 of the list of files in the given commit. Consumers of the output of this
2070 2076 class do not care about the implementation details of the actual manifests
2071 2077 they receive (i.e. tree or flat or lazily loaded, etc)."""
2072 2078
2073 2079 def __init__(self, opener, repo, rootstore, narrowmatch):
2074 2080 self.nodeconstants = repo.nodeconstants
2075 2081 usetreemanifest = False
2076 2082 cachesize = 4
2077 2083
2078 2084 opts = getattr(opener, 'options', None)
2079 2085 if opts is not None:
2080 2086 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
2081 2087 cachesize = opts.get(b'manifestcachesize', cachesize)
2082 2088
2083 2089 self._treemanifests = usetreemanifest
2084 2090
2085 2091 self._rootstore = rootstore
2086 2092 self._rootstore._setupmanifestcachehooks(repo)
2087 2093 self._narrowmatch = narrowmatch
2088 2094
2089 2095 # A cache of the manifestctx or treemanifestctx for each directory
2090 2096 self._dirmancache = {}
2091 2097 self._dirmancache[b''] = util.lrucachedict(cachesize)
2092 2098
2093 2099 self._cachesize = cachesize
2094 2100
2095 2101 def __getitem__(self, node):
2096 2102 """Retrieves the manifest instance for the given node. Throws a
2097 2103 LookupError if not found.
2098 2104 """
2099 2105 return self.get(b'', node)
2100 2106
2101 2107 @property
2102 2108 def narrowed(self):
2103 2109 return not (self._narrowmatch is None or self._narrowmatch.always())
2104 2110
2105 2111 def get(
2106 2112 self, tree: bytes, node: bytes, verify: bool = True
2107 2113 ) -> AnyManifestCtx:
2108 2114 """Retrieves the manifest instance for the given node. Throws a
2109 2115 LookupError if not found.
2110 2116
2111 2117 `verify` - if True an exception will be thrown if the node is not in
2112 2118 the revlog
2113 2119 """
2114 2120 if node in self._dirmancache.get(tree, ()):
2115 2121 return self._dirmancache[tree][node]
2116 2122
2117 2123 if not self._narrowmatch.always():
2118 2124 if not self._narrowmatch.visitdir(tree[:-1]):
2119 2125 return excludeddirmanifestctx(self.nodeconstants, tree, node)
2120 2126 if tree:
2121 2127 if self._rootstore._treeondisk:
2122 2128 if verify:
2123 2129 # Side-effect is LookupError is raised if node doesn't
2124 2130 # exist.
2125 2131 self.getstorage(tree).rev(node)
2126 2132
2127 2133 m = treemanifestctx(self, tree, node)
2128 2134 else:
2129 2135 raise error.Abort(
2130 2136 _(
2131 2137 b"cannot ask for manifest directory '%s' in a flat "
2132 2138 b"manifest"
2133 2139 )
2134 2140 % tree
2135 2141 )
2136 2142 else:
2137 2143 if verify:
2138 2144 # Side-effect is LookupError is raised if node doesn't exist.
2139 2145 self._rootstore.rev(node)
2140 2146
2141 2147 if self._treemanifests:
2142 2148 m = treemanifestctx(self, b'', node)
2143 2149 else:
2144 2150 m = manifestctx(self, node)
2145 2151
2146 2152 if node != self.nodeconstants.nullid:
2147 2153 mancache = self._dirmancache.get(tree)
2148 2154 if not mancache:
2149 2155 mancache = util.lrucachedict(self._cachesize)
2150 2156 self._dirmancache[tree] = mancache
2151 2157 mancache[node] = m
2152 2158 return m
2153 2159
2154 2160 def getstorage(self, tree):
2155 2161 return self._rootstore.dirlog(tree)
2156 2162
2157 2163 def clearcaches(self, clear_persisted_data: bool = False) -> None:
2158 2164 self._dirmancache.clear()
2159 2165 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
2160 2166
2161 2167 def rev(self, node) -> int:
2162 2168 return self._rootstore.rev(node)
2163 2169
2164 2170 def update_caches(self, transaction) -> None:
2165 2171 return self._rootstore._revlog.update_caches(transaction=transaction)
2166 2172
2167 2173
2168 2174 manifestlog = interfaceutil.implementer(repository.imanifestlog)(ManifestLog)
2169 2175
2170 2176 if typing.TYPE_CHECKING:
2171 2177 manifestlog = ManifestLog
2172 2178
2173 2179
2174 2180 class MemManifestCtx:
2181 _manifestdict: ManifestDict
2182
2175 2183 def __init__(self, manifestlog):
2176 2184 self._manifestlog = manifestlog
2177 2185 self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen)
2178 2186
2179 2187 def _storage(self) -> ManifestRevlog:
2180 2188 return self._manifestlog.getstorage(b'')
2181 2189
2182 2190 def copy(self) -> 'MemManifestCtx':
2183 2191 memmf = memmanifestctx(self._manifestlog)
2184 2192 memmf._manifestdict = self.read().copy()
2185 2193 return memmf
2186 2194
2187 2195 def read(self) -> 'ManifestDict':
2188 2196 return self._manifestdict
2189 2197
2190 2198 def write(self, transaction, link, p1, p2, added, removed, match=None):
2191 2199 return self._storage().add(
2192 2200 self._manifestdict,
2193 2201 transaction,
2194 2202 link,
2195 2203 p1,
2196 2204 p2,
2197 2205 added,
2198 2206 removed,
2199 2207 match=match,
2200 2208 )
2201 2209
2202 2210
2203 2211 memmanifestctx = interfaceutil.implementer(
2204 2212 repository.imanifestrevisionwritable
2205 2213 )(MemManifestCtx)
2206 2214
2207 2215 if typing.TYPE_CHECKING:
2208 2216 memmanifestctx = MemManifestCtx
2209 2217
2210 2218
2211 2219 class ManifestCtx:
2212 2220 """A class representing a single revision of a manifest, including its
2213 2221 contents, its parent revs, and its linkrev.
2214 2222 """
2215 2223
2224 _data: Optional[ManifestDict]
2225
2216 2226 def __init__(self, manifestlog, node):
2217 2227 self._manifestlog = manifestlog
2218 2228 self._data = None
2219 2229
2220 2230 self._node = node
2221 2231
2222 2232 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2223 2233 # but let's add it later when something needs it and we can load it
2224 2234 # lazily.
2225 2235 # self.p1, self.p2 = store.parents(node)
2226 2236 # rev = store.rev(node)
2227 2237 # self.linkrev = store.linkrev(rev)
2228 2238
2229 2239 def _storage(self) -> 'ManifestRevlog':
2230 2240 return self._manifestlog.getstorage(b'')
2231 2241
2232 2242 def node(self) -> bytes:
2233 2243 return self._node
2234 2244
2235 2245 def copy(self) -> MemManifestCtx:
2236 2246 memmf = memmanifestctx(self._manifestlog)
2237 2247 memmf._manifestdict = self.read().copy()
2238 2248 return memmf
2239 2249
2240 2250 @propertycache
2241 2251 def parents(self) -> Tuple[bytes, bytes]:
2242 2252 return self._storage().parents(self._node)
2243 2253
2244 2254 def read(self) -> 'ManifestDict':
2245 2255 if self._data is None:
2246 2256 nc = self._manifestlog.nodeconstants
2247 2257 if self._node == nc.nullid:
2248 2258 self._data = manifestdict(nc.nodelen)
2249 2259 else:
2250 2260 store = self._storage()
2251 2261 if self._node in store.fulltextcache:
2252 2262 text = pycompat.bytestr(store.fulltextcache[self._node])
2253 2263 else:
2254 2264 text = store.revision(self._node)
2255 2265 arraytext = bytearray(text)
2256 2266 store.fulltextcache[self._node] = arraytext
2257 2267 self._data = manifestdict(nc.nodelen, text)
2258 2268 return self._data
2259 2269
2260 2270 def readfast(self, shallow: bool = False) -> 'ManifestDict':
2261 2271 """Calls either readdelta or read, based on which would be less work.
2262 2272 readdelta is called if the delta is against the p1, and therefore can be
2263 2273 read quickly.
2264 2274
2265 2275 If `shallow` is True, nothing changes since this is a flat manifest.
2266 2276 """
2267 2277 util.nouideprecwarn(
2268 2278 b'"readfast" is deprecated use "read_any_fast_delta" or "read_delta_parents"',
2269 2279 b"6.9",
2270 2280 stacklevel=2,
2271 2281 )
2272 2282 store = self._storage()
2273 2283 r = store.rev(self._node)
2274 2284 deltaparent = store.deltaparent(r)
2275 2285 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2276 2286 return self.readdelta()
2277 2287 return self.read()
2278 2288
2279 2289 def readdelta(self, shallow: bool = False) -> 'ManifestDict':
2280 2290 """Returns a manifest containing just the entries that are present
2281 2291 in this manifest, but not in its p1 manifest. This is efficient to read
2282 2292 if the revlog delta is already p1.
2283 2293
2284 2294 Changing the value of `shallow` has no effect on flat manifests.
2285 2295 """
2286 2296 util.nouideprecwarn(
2287 2297 b'"readfast" is deprecated use "read_any_fast_delta" or "read_delta_new_entries"',
2288 2298 b"6.9",
2289 2299 stacklevel=2,
2290 2300 )
2291 2301 store = self._storage()
2292 2302 r = store.rev(self._node)
2293 2303 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2294 2304 return manifestdict(store.nodeconstants.nodelen, d)
2295 2305
2296 2306 def read_any_fast_delta(
2297 2307 self,
2298 2308 valid_bases: Optional[Collection[int]] = None,
2299 2309 *,
2300 2310 shallow: bool = False,
2301 2311 ) -> Tuple[Optional[int], ManifestDict]:
2302 2312 """see `imanifestrevisionstored` documentation"""
2303 2313 store = self._storage()
2304 2314 r = store.rev(self._node)
2305 2315 deltaparent = store.deltaparent(r)
2306 2316 if valid_bases is None:
2307 2317 # make sure the next check is True
2308 2318 valid_bases = (deltaparent,)
2309 2319 if deltaparent != nullrev and deltaparent in valid_bases:
2310 2320 d = mdiff.patchtext(store.revdiff(deltaparent, r))
2311 2321 return (
2312 2322 deltaparent,
2313 2323 manifestdict(store.nodeconstants.nodelen, d),
2314 2324 )
2315 2325 return (None, self.read())
2316 2326
2317 2327 def read_delta_parents(
2318 2328 self,
2319 2329 *,
2320 2330 shallow: bool = False,
2321 2331 exact: bool = True,
2322 2332 ) -> ManifestDict:
2323 2333 """see `interface.imanifestrevisionbase` documentations"""
2324 2334 store = self._storage()
2325 2335 r = store.rev(self._node)
2326 2336 deltaparent = store.deltaparent(r)
2327 2337 parents = [p for p in store.parentrevs(r) if p is not nullrev]
2328 2338 if not exact and deltaparent in parents:
2329 2339 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2330 2340 return manifestdict(store.nodeconstants.nodelen, d)
2331 2341 elif not exact or len(parents) == 0:
2332 2342 return self.read()
2333 2343 elif len(parents) == 1:
2334 2344 p = parents[0]
2335 2345 d = mdiff.patchtext(store.revdiff(p, r))
2336 2346 return manifestdict(store.nodeconstants.nodelen, d)
2337 2347 else:
2338 2348 p1, p2 = parents
2339 2349 d1 = mdiff.patchtext(store.revdiff(p1, r))
2340 2350 d2 = mdiff.patchtext(store.revdiff(p2, r))
2341 2351 d1 = manifestdict(store.nodeconstants.nodelen, d1)
2342 2352 d2 = manifestdict(store.nodeconstants.nodelen, d2)
2343 2353 md = manifestdict(store.nodeconstants.nodelen)
2344 2354 for f, new_node, new_flag in d1.iterentries():
2345 2355 if f not in d2:
2346 2356 continue
2347 2357 if new_node is not None:
2348 2358 md.set(f, new_node, new_flag)
2349 2359 return md
2350 2360
2351 2361 def read_delta_new_entries(self, *, shallow=False) -> ManifestDict:
2352 2362 """see `interface.imanifestrevisionbase` documentations"""
2353 2363 # If we are using narrow, returning a delta against an arbitrary
2354 2364 # changeset might return file outside the narrowspec. This can create
2355 2365 # issue when running validation server side with strict security as
2356 2366 # push from low priviledge usage might be seen as adding new revision
2357 2367 # for files they cannot touch. So we are strict if narrow is involved.
2358 2368 if self._manifestlog.narrowed:
2359 2369 return self.read_delta_parents(shallow=shallow, exact=True)
2360 2370 store = self._storage()
2361 2371 r = store.rev(self._node)
2362 2372 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2363 2373 return manifestdict(store.nodeconstants.nodelen, d)
2364 2374
2365 2375 def find(self, key: bytes) -> Tuple[bytes, bytes]:
2366 2376 return self.read().find(key)
2367 2377
2368 2378
2369 2379 manifestctx = interfaceutil.implementer(repository.imanifestrevisionstored)(
2370 2380 ManifestCtx
2371 2381 )
2372 2382
2373 2383 if typing.TYPE_CHECKING:
2374 2384 manifestctx = ManifestCtx
2375 2385
2376 2386
2377 2387 class MemTreeManifestCtx:
2388 _treemanifest: TreeManifest
2389
2378 2390 def __init__(self, manifestlog, dir=b''):
2379 2391 self._manifestlog = manifestlog
2380 2392 self._dir = dir
2381 2393 self._treemanifest = treemanifest(manifestlog.nodeconstants)
2382 2394
2383 2395 def _storage(self) -> ManifestRevlog:
2384 2396 return self._manifestlog.getstorage(b'')
2385 2397
2386 2398 def copy(self) -> 'MemTreeManifestCtx':
2387 2399 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2388 2400 memmf._treemanifest = self._treemanifest.copy()
2389 2401 return memmf
2390 2402
2391 2403 def read(self) -> 'TreeManifest':
2392 2404 return self._treemanifest
2393 2405
2394 2406 def write(self, transaction, link, p1, p2, added, removed, match=None):
2395 2407 def readtree(dir, node):
2396 2408 return self._manifestlog.get(dir, node).read()
2397 2409
2398 2410 return self._storage().add(
2399 2411 self._treemanifest,
2400 2412 transaction,
2401 2413 link,
2402 2414 p1,
2403 2415 p2,
2404 2416 added,
2405 2417 removed,
2406 2418 readtree=readtree,
2407 2419 match=match,
2408 2420 )
2409 2421
2410 2422
2411 2423 memtreemanifestctx = interfaceutil.implementer(
2412 2424 repository.imanifestrevisionwritable
2413 2425 )(MemTreeManifestCtx)
2414 2426
2415 2427 if typing.TYPE_CHECKING:
2416 2428 memtreemanifestctx = MemTreeManifestCtx
2417 2429
2418 2430
2419 2431 class TreeManifestCtx:
2432 _data: Optional[TreeManifest]
2433
2420 2434 def __init__(self, manifestlog, dir, node):
2421 2435 self._manifestlog = manifestlog
2422 2436 self._dir = dir
2423 2437 self._data = None
2424 2438
2425 2439 self._node = node
2426 2440
2427 2441 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2428 2442 # we can instantiate treemanifestctx objects for directories we don't
2429 2443 # have on disk.
2430 2444 # self.p1, self.p2 = store.parents(node)
2431 2445 # rev = store.rev(node)
2432 2446 # self.linkrev = store.linkrev(rev)
2433 2447
2434 2448 def _storage(self) -> ManifestRevlog:
2435 2449 narrowmatch = self._manifestlog._narrowmatch
2436 2450 if not narrowmatch.always():
2437 2451 if not narrowmatch.visitdir(self._dir[:-1]):
2438 2452 return excludedmanifestrevlog(
2439 2453 self._manifestlog.nodeconstants, self._dir
2440 2454 )
2441 2455 return self._manifestlog.getstorage(self._dir)
2442 2456
2443 2457 def read(self) -> 'TreeManifest':
2444 2458 if self._data is None:
2445 2459 store = self._storage()
2446 2460 if self._node == self._manifestlog.nodeconstants.nullid:
2447 2461 self._data = treemanifest(self._manifestlog.nodeconstants)
2448 2462 # TODO accessing non-public API
2449 2463 elif store._treeondisk:
2450 2464 m = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2451 2465
2452 2466 def gettext():
2453 2467 return store.revision(self._node)
2454 2468
2455 2469 def readsubtree(dir, subm):
2456 2470 # Set verify to False since we need to be able to create
2457 2471 # subtrees for trees that don't exist on disk.
2458 2472 return self._manifestlog.get(dir, subm, verify=False).read()
2459 2473
2460 2474 m.read(gettext, readsubtree)
2461 2475 m.setnode(self._node)
2462 2476 self._data = m
2463 2477 else:
2464 2478 if self._node in store.fulltextcache:
2465 2479 text = pycompat.bytestr(store.fulltextcache[self._node])
2466 2480 else:
2467 2481 text = store.revision(self._node)
2468 2482 arraytext = bytearray(text)
2469 2483 store.fulltextcache[self._node] = arraytext
2470 2484 self._data = treemanifest(
2471 2485 self._manifestlog.nodeconstants, dir=self._dir, text=text
2472 2486 )
2473 2487
2474 2488 return self._data
2475 2489
2476 2490 def node(self) -> bytes:
2477 2491 return self._node
2478 2492
2479 2493 def copy(self) -> 'MemTreeManifestCtx':
2480 2494 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2481 2495 memmf._treemanifest = self.read().copy()
2482 2496 return memmf
2483 2497
2484 2498 @propertycache
2485 2499 def parents(self) -> Tuple[bytes, bytes]:
2486 2500 return self._storage().parents(self._node)
2487 2501
2488 2502 def readdelta(self, shallow: bool = False) -> AnyManifestDict:
2489 2503 """see `imanifestrevisionstored` documentation"""
2490 2504 util.nouideprecwarn(
2491 2505 b'"readdelta" is deprecated use "read_any_fast_delta" or "read_delta_new_entries"',
2492 2506 b"6.9",
2493 2507 stacklevel=2,
2494 2508 )
2495 2509 store = self._storage()
2496 2510 if shallow:
2497 2511 r = store.rev(self._node)
2498 2512 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2499 2513 return manifestdict(store.nodeconstants.nodelen, d)
2500 2514 else:
2501 2515 # Need to perform a slow delta
2502 2516 r0 = store.deltaparent(store.rev(self._node))
2503 2517 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2504 2518 m1 = self.read()
2505 2519 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2506 2520 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2507 2521 if n1:
2508 2522 md[f] = n1
2509 2523 if fl1:
2510 2524 md.setflag(f, fl1)
2511 2525 return md
2512 2526
2513 2527 def read_any_fast_delta(
2514 2528 self,
2515 2529 valid_bases: Optional[Collection[int]] = None,
2516 2530 *,
2517 2531 shallow: bool = False,
2518 2532 ) -> Tuple[Optional[int], AnyManifestDict]:
2519 2533 """see `imanifestrevisionstored` documentation"""
2520 2534 store = self._storage()
2521 2535 r = store.rev(self._node)
2522 2536 deltaparent = store.deltaparent(r)
2523 2537
2524 2538 if valid_bases is None:
2525 2539 # make sure the next check is True
2526 2540 valid_bases = (deltaparent,)
2527 2541 can_use_delta = deltaparent != nullrev and deltaparent in valid_bases
2528 2542
2529 2543 if shallow:
2530 2544 if can_use_delta:
2531 2545 return (deltaparent, self._read_storage_delta_shallow())
2532 2546 else:
2533 2547 d = store.revision(self._node)
2534 2548 return (None, manifestdict(store.nodeconstants.nodelen, d))
2535 2549 else:
2536 2550 # note: This use "slow_delta" here is cargo culted from the previous
2537 2551 # implementation. I am not sure it make sense since the goal here is to
2538 2552 # be fast, so why are we computing a delta? On the other hand, tree
2539 2553 # manifest delta as fairly "cheap" and allow for skipping whole part of
2540 2554 # the tree that a full read would access. So it might be a good idea.
2541 2555 #
2542 2556 # If we realize we don't need delta here, we should simply use:
2543 2557 #
2544 2558 # return (None, self.read())
2545 2559 if can_use_delta:
2546 2560 return (None, self._read_storage_slow_delta(base=deltaparent))
2547 2561 else:
2548 2562 parents = [
2549 2563 p
2550 2564 for p in store.parentrevs(r)
2551 2565 if p is not nullrev and p in valid_bases
2552 2566 ]
2553 2567 if parents:
2554 2568 best_base = max(parents)
2555 2569 else:
2556 2570 best_base = max(valid_bases)
2557 2571 return (None, self._read_storage_slow_delta(base=best_base))
2558 2572
2559 2573 def _read_storage_delta_shallow(self) -> ManifestDict:
2560 2574 store = self._storage()
2561 2575 r = store.rev(self._node)
2562 2576 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2563 2577 return manifestdict(store.nodeconstants.nodelen, d)
2564 2578
2565 2579 def _read_storage_slow_delta(self, base) -> 'TreeManifest':
2566 2580 store = self._storage()
2567 2581 if base is None:
2568 2582 base = store.deltaparent(store.rev(self._node))
2569 2583 m0 = self._manifestlog.get(self._dir, store.node(base)).read()
2570 2584 m1 = self.read()
2571 2585 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2572 2586 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2573 2587 if n1:
2574 2588 md[f] = n1
2575 2589 if fl1:
2576 2590 md.setflag(f, fl1)
2577 2591 return md
2578 2592
2579 2593 def read_delta_parents(
2580 2594 self,
2581 2595 *,
2582 2596 shallow: bool = False,
2583 2597 exact: bool = True,
2584 2598 ) -> AnyManifestDict:
2585 2599 """see `interface.imanifestrevisionbase` documentations"""
2586 2600 store = self._storage()
2587 2601 r = store.rev(self._node)
2588 2602 parents = [p for p in store.parentrevs(r) if p is not nullrev]
2589 2603 if not exact:
2590 2604 return self.read_any_fast_delta(parents, shallow=shallow)[1]
2591 2605 elif len(parents) == 0:
2592 2606 if shallow:
2593 2607 d = store.revision(self._node)
2594 2608 return manifestdict(store.nodeconstants.nodelen, d)
2595 2609 else:
2596 2610 return self.read()
2597 2611 elif len(parents) == 1:
2598 2612 p = parents[0]
2599 2613 if shallow:
2600 2614 d = mdiff.patchtext(store.revdiff(p, r))
2601 2615 return manifestdict(store.nodeconstants.nodelen, d)
2602 2616 else:
2603 2617 return self._read_storage_slow_delta(base=p)
2604 2618 else:
2605 2619 p1, p2 = parents
2606 2620 if shallow:
2607 2621 d1 = mdiff.patchtext(store.revdiff(p1, r))
2608 2622 d2 = mdiff.patchtext(store.revdiff(p2, r))
2609 2623 d1 = manifestdict(store.nodeconstants.nodelen, d1)
2610 2624 d2 = manifestdict(store.nodeconstants.nodelen, d2)
2611 2625 md = manifestdict(store.nodeconstants.nodelen)
2612 2626 for f, new_node, new_flag in d1.iterentries():
2613 2627 if f not in d2:
2614 2628 continue
2615 2629 if new_node is not None:
2616 2630 md.set(f, new_node, new_flag)
2617 2631 return md
2618 2632 else:
2619 2633 m1 = self._manifestlog.get(self._dir, store.node(p1)).read()
2620 2634 m2 = self._manifestlog.get(self._dir, store.node(p2)).read()
2621 2635 mc = self.read()
2622 2636 d1 = m1.diff(mc)
2623 2637 d2 = m2.diff(mc)
2624 2638 md = treemanifest(
2625 2639 self._manifestlog.nodeconstants,
2626 2640 dir=self._dir,
2627 2641 )
2628 2642 for f, new_node, new_flag in d1.iterentries():
2629 2643 if f not in d2:
2630 2644 continue
2631 2645 if new_node is not None:
2632 2646 md.set(f, new_node, new_flag)
2633 2647 return md
2634 2648
2635 2649 def read_delta_new_entries(
2636 2650 self, *, shallow: bool = False
2637 2651 ) -> AnyManifestDict:
2638 2652 """see `interface.imanifestrevisionbase` documentations"""
2639 2653 # If we are using narrow, returning a delta against an arbitrary
2640 2654 # changeset might return file outside the narrowspec. This can create
2641 2655 # issue when running validation server side with strict security as
2642 2656 # push from low priviledge usage might be seen as adding new revision
2643 2657 # for files they cannot touch. So we are strict if narrow is involved.
2644 2658 if self._manifestlog.narrowed:
2645 2659 return self.read_delta_parents(shallow=shallow, exact=True)
2646 2660 # delegate to existing another existing method for simplicity
2647 2661 store = self._storage()
2648 2662 r = store.rev(self._node)
2649 2663 bases = (store.deltaparent(r),)
2650 2664 return self.read_any_fast_delta(bases, shallow=shallow)[1]
2651 2665
2652 2666 def readfast(self, shallow=False) -> AnyManifestDict:
2653 2667 """Calls either readdelta or read, based on which would be less work.
2654 2668 readdelta is called if the delta is against the p1, and therefore can be
2655 2669 read quickly.
2656 2670
2657 2671 If `shallow` is True, it only returns the entries from this manifest,
2658 2672 and not any submanifests.
2659 2673 """
2660 2674 util.nouideprecwarn(
2661 2675 b'"readdelta" is deprecated use "read_any_fast_delta" or "read_delta_parents"',
2662 2676 b"6.9",
2663 2677 stacklevel=2,
2664 2678 )
2665 2679 store = self._storage()
2666 2680 r = store.rev(self._node)
2667 2681 deltaparent = store.deltaparent(r)
2668 2682 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2669 2683 return self.readdelta(shallow=shallow)
2670 2684
2671 2685 if shallow:
2672 2686 return manifestdict(
2673 2687 store.nodeconstants.nodelen, store.revision(self._node)
2674 2688 )
2675 2689 else:
2676 2690 return self.read()
2677 2691
2678 2692 def find(self, key: bytes) -> Tuple[bytes, bytes]:
2679 2693 return self.read().find(key)
2680 2694
2681 2695
2682 2696 treemanifestctx = interfaceutil.implementer(repository.imanifestrevisionstored)(
2683 2697 TreeManifestCtx
2684 2698 )
2685 2699
2686 2700 if typing.TYPE_CHECKING:
2687 2701 treemanifestctx = TreeManifestCtx
2688 2702
2689 2703
2690 2704 class excludeddir(treemanifest):
2691 2705 """Stand-in for a directory that is excluded from the repository.
2692 2706
2693 2707 With narrowing active on a repository that uses treemanifests,
2694 2708 some of the directory revlogs will be excluded from the resulting
2695 2709 clone. This is a huge storage win for clients, but means we need
2696 2710 some sort of pseudo-manifest to surface to internals so we can
2697 2711 detect a merge conflict outside the narrowspec. That's what this
2698 2712 class is: it stands in for a directory whose node is known, but
2699 2713 whose contents are unknown.
2700 2714 """
2701 2715
2716 _files: Dict[bytes, bytes]
2717 _flags: Dict[bytes, bytes]
2718
2702 2719 def __init__(self, nodeconstants, dir, node):
2703 2720 super(excludeddir, self).__init__(nodeconstants, dir)
2704 2721 self._node = node
2705 2722 # Add an empty file, which will be included by iterators and such,
2706 2723 # appearing as the directory itself (i.e. something like "dir/")
2707 2724 self._files[b''] = node
2708 2725 self._flags[b''] = b't'
2709 2726
2710 2727 # Manifests outside the narrowspec should never be modified, so avoid
2711 2728 # copying. This makes a noticeable difference when there are very many
2712 2729 # directories outside the narrowspec. Also, it makes sense for the copy to
2713 2730 # be of the same type as the original, which would not happen with the
2714 2731 # super type's copy().
2715 2732 def copy(self):
2716 2733 return self
2717 2734
2718 2735
2719 2736 class excludeddirmanifestctx(treemanifestctx):
2720 2737 """context wrapper for excludeddir - see that docstring for rationale"""
2721 2738
2722 2739 def __init__(self, nodeconstants, dir, node):
2723 2740 self.nodeconstants = nodeconstants
2724 2741 self._dir = dir
2725 2742 self._node = node
2726 2743
2727 2744 def read(self):
2728 2745 return excludeddir(self.nodeconstants, self._dir, self._node)
2729 2746
2730 2747 def readfast(self, shallow=False):
2731 2748 # special version of readfast since we don't have underlying storage
2732 2749 return self.read()
2733 2750
2734 2751 def write(self, *args):
2735 2752 raise error.ProgrammingError(
2736 2753 b'attempt to write manifest from excluded dir %s' % self._dir
2737 2754 )
2738 2755
2739 2756
2740 2757 class excludedmanifestrevlog(manifestrevlog):
2741 2758 """Stand-in for excluded treemanifest revlogs.
2742 2759
2743 2760 When narrowing is active on a treemanifest repository, we'll have
2744 2761 references to directories we can't see due to the revlog being
2745 2762 skipped. This class exists to conform to the manifestrevlog
2746 2763 interface for those directories and proactively prevent writes to
2747 2764 outside the narrowspec.
2748 2765 """
2749 2766
2750 2767 def __init__(self, nodeconstants, dir):
2751 2768 self.nodeconstants = nodeconstants
2752 2769 self._dir = dir
2753 2770
2754 2771 def __len__(self):
2755 2772 raise error.ProgrammingError(
2756 2773 b'attempt to get length of excluded dir %s' % self._dir
2757 2774 )
2758 2775
2759 2776 def rev(self, node):
2760 2777 raise error.ProgrammingError(
2761 2778 b'attempt to get rev from excluded dir %s' % self._dir
2762 2779 )
2763 2780
2764 2781 def linkrev(self, node):
2765 2782 raise error.ProgrammingError(
2766 2783 b'attempt to get linkrev from excluded dir %s' % self._dir
2767 2784 )
2768 2785
2769 2786 def node(self, rev):
2770 2787 raise error.ProgrammingError(
2771 2788 b'attempt to get node from excluded dir %s' % self._dir
2772 2789 )
2773 2790
2774 2791 def add(self, *args, **kwargs):
2775 2792 # We should never write entries in dirlogs outside the narrow clone.
2776 2793 # However, the method still gets called from writesubtree() in
2777 2794 # _addtree(), so we need to handle it. We should possibly make that
2778 2795 # avoid calling add() with a clean manifest (_dirty is always False
2779 2796 # in excludeddir instances).
2780 2797 pass
@@ -1,4123 +1,4125 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import functools
20 20 import io
21 21 import os
22 22 import struct
23 23 import typing
24 24 import weakref
25 25 import zlib
26 26
27 27 from typing import (
28 Iterable,
29 Iterator,
28 30 Optional,
29 31 Tuple,
30 32 )
31 33
32 34 # import stuff from node for others to import from revlog
33 35 from .node import (
34 36 bin,
35 37 hex,
36 38 nullrev,
37 39 sha1nodeconstants,
38 40 short,
39 41 wdirrev,
40 42 )
41 43 from .i18n import _
42 44 from .revlogutils.constants import (
43 45 ALL_KINDS,
44 46 CHANGELOGV2,
45 47 COMP_MODE_DEFAULT,
46 48 COMP_MODE_INLINE,
47 49 COMP_MODE_PLAIN,
48 50 DELTA_BASE_REUSE_NO,
49 51 DELTA_BASE_REUSE_TRY,
50 52 ENTRY_RANK,
51 53 FEATURES_BY_VERSION,
52 54 FLAG_GENERALDELTA,
53 55 FLAG_INLINE_DATA,
54 56 INDEX_HEADER,
55 57 KIND_CHANGELOG,
56 58 KIND_FILELOG,
57 59 RANK_UNKNOWN,
58 60 REVLOGV0,
59 61 REVLOGV1,
60 62 REVLOGV1_FLAGS,
61 63 REVLOGV2,
62 64 REVLOGV2_FLAGS,
63 65 REVLOG_DEFAULT_FLAGS,
64 66 REVLOG_DEFAULT_FORMAT,
65 67 REVLOG_DEFAULT_VERSION,
66 68 SUPPORTED_FLAGS,
67 69 )
68 70 from .revlogutils.flagutil import (
69 71 REVIDX_DEFAULT_FLAGS,
70 72 REVIDX_ELLIPSIS,
71 73 REVIDX_EXTSTORED,
72 74 REVIDX_FLAGS_ORDER,
73 75 REVIDX_HASCOPIESINFO,
74 76 REVIDX_ISCENSORED,
75 77 REVIDX_RAWTEXT_CHANGING_FLAGS,
76 78 )
77 79 from .thirdparty import attr
78 80
79 81 # Force pytype to use the non-vendored package
80 82 if typing.TYPE_CHECKING:
81 83 # noinspection PyPackageRequirements
82 84 import attr
83 85
84 86 from . import (
85 87 ancestor,
86 88 dagop,
87 89 error,
88 90 mdiff,
89 91 policy,
90 92 pycompat,
91 93 revlogutils,
92 94 templatefilters,
93 95 util,
94 96 vfs as vfsmod,
95 97 )
96 98 from .interfaces import (
97 99 repository,
98 100 util as interfaceutil,
99 101 )
100 102 from .revlogutils import (
101 103 deltas as deltautil,
102 104 docket as docketutil,
103 105 flagutil,
104 106 nodemap as nodemaputil,
105 107 randomaccessfile,
106 108 revlogv0,
107 109 rewrite,
108 110 sidedata as sidedatautil,
109 111 )
110 112 from .utils import (
111 113 storageutil,
112 114 stringutil,
113 115 )
114 116
115 117 # blanked usage of all the name to prevent pyflakes constraints
116 118 # We need these name available in the module for extensions.
117 119
118 120 REVLOGV0
119 121 REVLOGV1
120 122 REVLOGV2
121 123 CHANGELOGV2
122 124 FLAG_INLINE_DATA
123 125 FLAG_GENERALDELTA
124 126 REVLOG_DEFAULT_FLAGS
125 127 REVLOG_DEFAULT_FORMAT
126 128 REVLOG_DEFAULT_VERSION
127 129 REVLOGV1_FLAGS
128 130 REVLOGV2_FLAGS
129 131 REVIDX_ISCENSORED
130 132 REVIDX_ELLIPSIS
131 133 REVIDX_HASCOPIESINFO
132 134 REVIDX_EXTSTORED
133 135 REVIDX_DEFAULT_FLAGS
134 136 REVIDX_FLAGS_ORDER
135 137 REVIDX_RAWTEXT_CHANGING_FLAGS
136 138
137 139 parsers = policy.importmod('parsers')
138 140 rustancestor = policy.importrust('ancestor')
139 141 rustdagop = policy.importrust('dagop')
140 142 rustrevlog = policy.importrust('revlog')
141 143
142 144 # Aliased for performance.
143 145 _zlibdecompress = zlib.decompress
144 146
145 147 # max size of inline data embedded into a revlog
146 148 _maxinline = 131072
147 149
148 150
149 151 # Flag processors for REVIDX_ELLIPSIS.
150 152 def ellipsisreadprocessor(rl, text):
151 153 return text, False
152 154
153 155
154 156 def ellipsiswriteprocessor(rl, text):
155 157 return text, False
156 158
157 159
158 160 def ellipsisrawprocessor(rl, text):
159 161 return False
160 162
161 163
162 164 ellipsisprocessor = (
163 165 ellipsisreadprocessor,
164 166 ellipsiswriteprocessor,
165 167 ellipsisrawprocessor,
166 168 )
167 169
168 170
169 171 def _verify_revision(rl, skipflags, state, node):
170 172 """Verify the integrity of the given revlog ``node`` while providing a hook
171 173 point for extensions to influence the operation."""
172 174 if skipflags:
173 175 state[b'skipread'].add(node)
174 176 else:
175 177 # Side-effect: read content and verify hash.
176 178 rl.revision(node)
177 179
178 180
179 181 # True if a fast implementation for persistent-nodemap is available
180 182 #
181 183 # We also consider we have a "fast" implementation in "pure" python because
182 184 # people using pure don't really have performance consideration (and a
183 185 # wheelbarrow of other slowness source)
184 186 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
185 187 parsers, 'BaseIndexObject'
186 188 )
187 189
188 190
189 191 @attr.s(slots=True)
190 192 class RevLogRevisionDelta:
191 193 node = attr.ib()
192 194 p1node = attr.ib()
193 195 p2node = attr.ib()
194 196 basenode = attr.ib()
195 197 flags = attr.ib()
196 198 baserevisionsize = attr.ib()
197 199 revision = attr.ib()
198 200 delta = attr.ib()
199 201 sidedata = attr.ib()
200 202 protocol_flags = attr.ib()
201 203 linknode = attr.ib(default=None)
202 204
203 205
204 206 revlogrevisiondelta = interfaceutil.implementer(repository.irevisiondelta)(
205 207 RevLogRevisionDelta
206 208 )
207 209
208 210 if typing.TYPE_CHECKING:
209 211 revlogrevisiondelta = RevLogRevisionDelta
210 212
211 213
212 214 @attr.s(frozen=True)
213 215 class RevLogProblem:
214 216 warning = attr.ib(default=None, type=Optional[bytes])
215 217 error = attr.ib(default=None, type=Optional[bytes])
216 218 node = attr.ib(default=None, type=Optional[bytes])
217 219
218 220
219 221 revlogproblem = interfaceutil.implementer(repository.iverifyproblem)(
220 222 RevLogProblem
221 223 )
222 224
223 225 if typing.TYPE_CHECKING:
224 226 revlogproblem = RevLogProblem
225 227
226 228
227 229 def parse_index_v1(data, inline):
228 230 # call the C implementation to parse the index data
229 231 index, cache = parsers.parse_index2(data, inline)
230 232 return index, cache
231 233
232 234
233 235 def parse_index_v2(data, inline):
234 236 # call the C implementation to parse the index data
235 237 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
236 238 return index, cache
237 239
238 240
239 241 def parse_index_cl_v2(data, inline):
240 242 # call the C implementation to parse the index data
241 243 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
242 244 return index, cache
243 245
244 246
245 247 if hasattr(parsers, 'parse_index_devel_nodemap'):
246 248
247 249 def parse_index_v1_nodemap(data, inline):
248 250 index, cache = parsers.parse_index_devel_nodemap(data, inline)
249 251 return index, cache
250 252
251 253 else:
252 254 parse_index_v1_nodemap = None
253 255
254 256
255 257 def parse_index_v1_rust(data, inline, default_header):
256 258 cache = (0, data) if inline else None
257 259 return rustrevlog.Index(data, default_header), cache
258 260
259 261
260 262 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
261 263 # signed integer)
262 264 _maxentrysize = 0x7FFFFFFF
263 265
264 266 FILE_TOO_SHORT_MSG = _(
265 267 b'cannot read from revlog %s;'
266 268 b' expected %d bytes from offset %d, data size is %d'
267 269 )
268 270
269 271 hexdigits = b'0123456789abcdefABCDEF'
270 272
271 273
272 274 class _Config:
273 275 def copy(self):
274 276 return self.__class__(**self.__dict__)
275 277
276 278
277 279 @attr.s()
278 280 class FeatureConfig(_Config):
279 281 """Hold configuration values about the available revlog features"""
280 282
281 283 # the default compression engine
282 284 compression_engine = attr.ib(default=b'zlib')
283 285 # compression engines options
284 286 compression_engine_options = attr.ib(default=attr.Factory(dict))
285 287
286 288 # can we use censor on this revlog
287 289 censorable = attr.ib(default=False)
288 290 # does this revlog use the "side data" feature
289 291 has_side_data = attr.ib(default=False)
290 292 # might remove rank configuration once the computation has no impact
291 293 compute_rank = attr.ib(default=False)
292 294 # parent order is supposed to be semantically irrelevant, so we
293 295 # normally resort parents to ensure that the first parent is non-null,
294 296 # if there is a non-null parent at all.
295 297 # filelog abuses the parent order as flag to mark some instances of
296 298 # meta-encoded files, so allow it to disable this behavior.
297 299 canonical_parent_order = attr.ib(default=False)
298 300 # can ellipsis commit be used
299 301 enable_ellipsis = attr.ib(default=False)
300 302
301 303 def copy(self):
302 304 new = super().copy()
303 305 new.compression_engine_options = self.compression_engine_options.copy()
304 306 return new
305 307
306 308
307 309 @attr.s()
308 310 class DataConfig(_Config):
309 311 """Hold configuration value about how the revlog data are read"""
310 312
311 313 # should we try to open the "pending" version of the revlog
312 314 try_pending = attr.ib(default=False)
313 315 # should we try to open the "splitted" version of the revlog
314 316 try_split = attr.ib(default=False)
315 317 # When True, indexfile should be opened with checkambig=True at writing,
316 318 # to avoid file stat ambiguity.
317 319 check_ambig = attr.ib(default=False)
318 320
319 321 # If true, use mmap instead of reading to deal with large index
320 322 mmap_large_index = attr.ib(default=False)
321 323 # how much data is large
322 324 mmap_index_threshold = attr.ib(default=None)
323 325 # How much data to read and cache into the raw revlog data cache.
324 326 chunk_cache_size = attr.ib(default=65536)
325 327
326 328 # The size of the uncompressed cache compared to the largest revision seen.
327 329 uncompressed_cache_factor = attr.ib(default=None)
328 330
329 331 # The number of chunk cached
330 332 uncompressed_cache_count = attr.ib(default=None)
331 333
332 334 # Allow sparse reading of the revlog data
333 335 with_sparse_read = attr.ib(default=False)
334 336 # minimal density of a sparse read chunk
335 337 sr_density_threshold = attr.ib(default=0.50)
336 338 # minimal size of data we skip when performing sparse read
337 339 sr_min_gap_size = attr.ib(default=262144)
338 340
339 341 # are delta encoded against arbitrary bases.
340 342 generaldelta = attr.ib(default=False)
341 343
342 344
343 345 @attr.s()
344 346 class DeltaConfig(_Config):
345 347 """Hold configuration value about how new delta are computed
346 348
347 349 Some attributes are duplicated from DataConfig to help havign each object
348 350 self contained.
349 351 """
350 352
351 353 # can delta be encoded against arbitrary bases.
352 354 general_delta = attr.ib(default=False)
353 355 # Allow sparse writing of the revlog data
354 356 sparse_revlog = attr.ib(default=False)
355 357 # maximum length of a delta chain
356 358 max_chain_len = attr.ib(default=None)
357 359 # Maximum distance between delta chain base start and end
358 360 max_deltachain_span = attr.ib(default=-1)
359 361 # If `upper_bound_comp` is not None, this is the expected maximal gain from
360 362 # compression for the data content.
361 363 upper_bound_comp = attr.ib(default=None)
362 364 # Should we try a delta against both parent
363 365 delta_both_parents = attr.ib(default=True)
364 366 # Test delta base candidate group by chunk of this maximal size.
365 367 candidate_group_chunk_size = attr.ib(default=0)
366 368 # Should we display debug information about delta computation
367 369 debug_delta = attr.ib(default=False)
368 370 # trust incoming delta by default
369 371 lazy_delta = attr.ib(default=True)
370 372 # trust the base of incoming delta by default
371 373 lazy_delta_base = attr.ib(default=False)
372 374
373 375
374 376 class _InnerRevlog:
375 377 """An inner layer of the revlog object
376 378
377 379 That layer exist to be able to delegate some operation to Rust, its
378 380 boundaries are arbitrary and based on what we can delegate to Rust.
379 381 """
380 382
381 383 opener: vfsmod.vfs
382 384
383 385 def __init__(
384 386 self,
385 387 opener: vfsmod.vfs,
386 388 index,
387 389 index_file,
388 390 data_file,
389 391 sidedata_file,
390 392 inline,
391 393 data_config,
392 394 delta_config,
393 395 feature_config,
394 396 chunk_cache,
395 397 default_compression_header,
396 398 ):
397 399 self.opener = opener
398 400 self.index = index
399 401
400 402 self.index_file = index_file
401 403 self.data_file = data_file
402 404 self.sidedata_file = sidedata_file
403 405 self.inline = inline
404 406 self.data_config = data_config
405 407 self.delta_config = delta_config
406 408 self.feature_config = feature_config
407 409
408 410 # used during diverted write.
409 411 self._orig_index_file = None
410 412
411 413 self._default_compression_header = default_compression_header
412 414
413 415 # index
414 416
415 417 # 3-tuple of file handles being used for active writing.
416 418 self._writinghandles = None
417 419
418 420 self._segmentfile = randomaccessfile.randomaccessfile(
419 421 self.opener,
420 422 (self.index_file if self.inline else self.data_file),
421 423 self.data_config.chunk_cache_size,
422 424 chunk_cache,
423 425 )
424 426 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
425 427 self.opener,
426 428 self.sidedata_file,
427 429 self.data_config.chunk_cache_size,
428 430 )
429 431
430 432 # revlog header -> revlog compressor
431 433 self._decompressors = {}
432 434 # 3-tuple of (node, rev, text) for a raw revision.
433 435 self._revisioncache = None
434 436
435 437 # cache some uncompressed chunks
436 438 # rev β†’ uncompressed_chunk
437 439 #
438 440 # the max cost is dynamically updated to be proportionnal to the
439 441 # size of revision we actually encounter.
440 442 self._uncompressed_chunk_cache = None
441 443 if self.data_config.uncompressed_cache_factor is not None:
442 444 self._uncompressed_chunk_cache = util.lrucachedict(
443 445 self.data_config.uncompressed_cache_count,
444 446 maxcost=65536, # some arbitrary initial value
445 447 )
446 448
447 449 self._delay_buffer = None
448 450
449 451 def __len__(self):
450 452 return len(self.index)
451 453
452 454 def clear_cache(self):
453 455 assert not self.is_delaying
454 456 self._revisioncache = None
455 457 if self._uncompressed_chunk_cache is not None:
456 458 self._uncompressed_chunk_cache.clear()
457 459 self._segmentfile.clear_cache()
458 460 self._segmentfile_sidedata.clear_cache()
459 461
460 462 @property
461 463 def canonical_index_file(self):
462 464 if self._orig_index_file is not None:
463 465 return self._orig_index_file
464 466 return self.index_file
465 467
466 468 @property
467 469 def is_delaying(self):
468 470 """is the revlog is currently delaying the visibility of written data?
469 471
470 472 The delaying mechanism can be either in-memory or written on disk in a
471 473 side-file."""
472 474 return (self._delay_buffer is not None) or (
473 475 self._orig_index_file is not None
474 476 )
475 477
476 478 # Derived from index values.
477 479
478 480 def start(self, rev):
479 481 """the offset of the data chunk for this revision"""
480 482 return int(self.index[rev][0] >> 16)
481 483
482 484 def length(self, rev):
483 485 """the length of the data chunk for this revision"""
484 486 return self.index[rev][1]
485 487
486 488 def end(self, rev):
487 489 """the end of the data chunk for this revision"""
488 490 return self.start(rev) + self.length(rev)
489 491
490 492 def deltaparent(self, rev):
491 493 """return deltaparent of the given revision"""
492 494 base = self.index[rev][3]
493 495 if base == rev:
494 496 return nullrev
495 497 elif self.delta_config.general_delta:
496 498 return base
497 499 else:
498 500 return rev - 1
499 501
500 502 def issnapshot(self, rev):
501 503 """tells whether rev is a snapshot"""
502 504 if not self.delta_config.sparse_revlog:
503 505 return self.deltaparent(rev) == nullrev
504 506 elif hasattr(self.index, 'issnapshot'):
505 507 # directly assign the method to cache the testing and access
506 508 self.issnapshot = self.index.issnapshot
507 509 return self.issnapshot(rev)
508 510 if rev == nullrev:
509 511 return True
510 512 entry = self.index[rev]
511 513 base = entry[3]
512 514 if base == rev:
513 515 return True
514 516 if base == nullrev:
515 517 return True
516 518 p1 = entry[5]
517 519 while self.length(p1) == 0:
518 520 b = self.deltaparent(p1)
519 521 if b == p1:
520 522 break
521 523 p1 = b
522 524 p2 = entry[6]
523 525 while self.length(p2) == 0:
524 526 b = self.deltaparent(p2)
525 527 if b == p2:
526 528 break
527 529 p2 = b
528 530 if base == p1 or base == p2:
529 531 return False
530 532 return self.issnapshot(base)
531 533
532 534 def _deltachain(self, rev, stoprev=None):
533 535 """Obtain the delta chain for a revision.
534 536
535 537 ``stoprev`` specifies a revision to stop at. If not specified, we
536 538 stop at the base of the chain.
537 539
538 540 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
539 541 revs in ascending order and ``stopped`` is a bool indicating whether
540 542 ``stoprev`` was hit.
541 543 """
542 544 generaldelta = self.delta_config.general_delta
543 545 # Try C implementation.
544 546 try:
545 547 return self.index.deltachain(rev, stoprev, generaldelta)
546 548 except AttributeError:
547 549 pass
548 550
549 551 chain = []
550 552
551 553 # Alias to prevent attribute lookup in tight loop.
552 554 index = self.index
553 555
554 556 iterrev = rev
555 557 e = index[iterrev]
556 558 while iterrev != e[3] and iterrev != stoprev:
557 559 chain.append(iterrev)
558 560 if generaldelta:
559 561 iterrev = e[3]
560 562 else:
561 563 iterrev -= 1
562 564 e = index[iterrev]
563 565
564 566 if iterrev == stoprev:
565 567 stopped = True
566 568 else:
567 569 chain.append(iterrev)
568 570 stopped = False
569 571
570 572 chain.reverse()
571 573 return chain, stopped
572 574
573 575 @util.propertycache
574 576 def _compressor(self):
575 577 engine = util.compengines[self.feature_config.compression_engine]
576 578 return engine.revlogcompressor(
577 579 self.feature_config.compression_engine_options
578 580 )
579 581
580 582 @util.propertycache
581 583 def _decompressor(self):
582 584 """the default decompressor"""
583 585 if self._default_compression_header is None:
584 586 return None
585 587 t = self._default_compression_header
586 588 c = self._get_decompressor(t)
587 589 return c.decompress
588 590
589 591 def _get_decompressor(self, t: bytes):
590 592 try:
591 593 compressor = self._decompressors[t]
592 594 except KeyError:
593 595 try:
594 596 engine = util.compengines.forrevlogheader(t)
595 597 compressor = engine.revlogcompressor(
596 598 self.feature_config.compression_engine_options
597 599 )
598 600 self._decompressors[t] = compressor
599 601 except KeyError:
600 602 raise error.RevlogError(
601 603 _(b'unknown compression type %s') % binascii.hexlify(t)
602 604 )
603 605 return compressor
604 606
605 607 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
606 608 """Generate a possibly-compressed representation of data."""
607 609 if not data:
608 610 return b'', data
609 611
610 612 compressed = self._compressor.compress(data)
611 613
612 614 if compressed:
613 615 # The revlog compressor added the header in the returned data.
614 616 return b'', compressed
615 617
616 618 if data[0:1] == b'\0':
617 619 return b'', data
618 620 return b'u', data
619 621
620 622 def decompress(self, data: bytes):
621 623 """Decompress a revlog chunk.
622 624
623 625 The chunk is expected to begin with a header identifying the
624 626 format type so it can be routed to an appropriate decompressor.
625 627 """
626 628 if not data:
627 629 return data
628 630
629 631 # Revlogs are read much more frequently than they are written and many
630 632 # chunks only take microseconds to decompress, so performance is
631 633 # important here.
632 634 #
633 635 # We can make a few assumptions about revlogs:
634 636 #
635 637 # 1) the majority of chunks will be compressed (as opposed to inline
636 638 # raw data).
637 639 # 2) decompressing *any* data will likely by at least 10x slower than
638 640 # returning raw inline data.
639 641 # 3) we want to prioritize common and officially supported compression
640 642 # engines
641 643 #
642 644 # It follows that we want to optimize for "decompress compressed data
643 645 # when encoded with common and officially supported compression engines"
644 646 # case over "raw data" and "data encoded by less common or non-official
645 647 # compression engines." That is why we have the inline lookup first
646 648 # followed by the compengines lookup.
647 649 #
648 650 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
649 651 # compressed chunks. And this matters for changelog and manifest reads.
650 652 t = data[0:1]
651 653
652 654 if t == b'x':
653 655 try:
654 656 return _zlibdecompress(data)
655 657 except zlib.error as e:
656 658 raise error.RevlogError(
657 659 _(b'revlog decompress error: %s')
658 660 % stringutil.forcebytestr(e)
659 661 )
660 662 # '\0' is more common than 'u' so it goes first.
661 663 elif t == b'\0':
662 664 return data
663 665 elif t == b'u':
664 666 return util.buffer(data, 1)
665 667
666 668 compressor = self._get_decompressor(t)
667 669
668 670 return compressor.decompress(data)
669 671
670 672 @contextlib.contextmanager
671 673 def reading(self):
672 674 """Context manager that keeps data and sidedata files open for reading"""
673 675 if len(self.index) == 0:
674 676 yield # nothing to be read
675 677 elif self._delay_buffer is not None and self.inline:
676 678 msg = "revlog with delayed write should not be inline"
677 679 raise error.ProgrammingError(msg)
678 680 else:
679 681 with self._segmentfile.reading():
680 682 with self._segmentfile_sidedata.reading():
681 683 yield
682 684
683 685 @property
684 686 def is_writing(self):
685 687 """True is a writing context is open"""
686 688 return self._writinghandles is not None
687 689
688 690 @property
689 691 def is_open(self):
690 692 """True if any file handle is being held
691 693
692 694 Used for assert and debug in the python code"""
693 695 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
694 696
695 697 @contextlib.contextmanager
696 698 def writing(self, transaction, data_end=None, sidedata_end=None):
697 699 """Open the revlog files for writing
698 700
699 701 Add content to a revlog should be done within such context.
700 702 """
701 703 if self.is_writing:
702 704 yield
703 705 else:
704 706 ifh = dfh = sdfh = None
705 707 try:
706 708 r = len(self.index)
707 709 # opening the data file.
708 710 dsize = 0
709 711 if r:
710 712 dsize = self.end(r - 1)
711 713 dfh = None
712 714 if not self.inline:
713 715 try:
714 716 dfh = self.opener(self.data_file, mode=b"r+")
715 717 if data_end is None:
716 718 dfh.seek(0, os.SEEK_END)
717 719 else:
718 720 dfh.seek(data_end, os.SEEK_SET)
719 721 except FileNotFoundError:
720 722 dfh = self.opener(self.data_file, mode=b"w+")
721 723 transaction.add(self.data_file, dsize)
722 724 if self.sidedata_file is not None:
723 725 assert sidedata_end is not None
724 726 # revlog-v2 does not inline, help Pytype
725 727 assert dfh is not None
726 728 try:
727 729 sdfh = self.opener(self.sidedata_file, mode=b"r+")
728 730 dfh.seek(sidedata_end, os.SEEK_SET)
729 731 except FileNotFoundError:
730 732 sdfh = self.opener(self.sidedata_file, mode=b"w+")
731 733 transaction.add(self.sidedata_file, sidedata_end)
732 734
733 735 # opening the index file.
734 736 isize = r * self.index.entry_size
735 737 ifh = self.__index_write_fp()
736 738 if self.inline:
737 739 transaction.add(self.index_file, dsize + isize)
738 740 else:
739 741 transaction.add(self.index_file, isize)
740 742 # exposing all file handle for writing.
741 743 self._writinghandles = (ifh, dfh, sdfh)
742 744 self._segmentfile.writing_handle = ifh if self.inline else dfh
743 745 self._segmentfile_sidedata.writing_handle = sdfh
744 746 yield
745 747 finally:
746 748 self._writinghandles = None
747 749 self._segmentfile.writing_handle = None
748 750 self._segmentfile_sidedata.writing_handle = None
749 751 if dfh is not None:
750 752 dfh.close()
751 753 if sdfh is not None:
752 754 sdfh.close()
753 755 # closing the index file last to avoid exposing referent to
754 756 # potential unflushed data content.
755 757 if ifh is not None:
756 758 ifh.close()
757 759
758 760 def __index_write_fp(self, index_end=None):
759 761 """internal method to open the index file for writing
760 762
761 763 You should not use this directly and use `_writing` instead
762 764 """
763 765 try:
764 766 if self._delay_buffer is None:
765 767 f = self.opener(
766 768 self.index_file,
767 769 mode=b"r+",
768 770 checkambig=self.data_config.check_ambig,
769 771 )
770 772 else:
771 773 # check_ambig affect we way we open file for writing, however
772 774 # here, we do not actually open a file for writting as write
773 775 # will appened to a delay_buffer. So check_ambig is not
774 776 # meaningful and unneeded here.
775 777 f = randomaccessfile.appender(
776 778 self.opener, self.index_file, b"r+", self._delay_buffer
777 779 )
778 780 if index_end is None:
779 781 f.seek(0, os.SEEK_END)
780 782 else:
781 783 f.seek(index_end, os.SEEK_SET)
782 784 return f
783 785 except FileNotFoundError:
784 786 if self._delay_buffer is None:
785 787 return self.opener(
786 788 self.index_file,
787 789 mode=b"w+",
788 790 checkambig=self.data_config.check_ambig,
789 791 )
790 792 else:
791 793 return randomaccessfile.appender(
792 794 self.opener, self.index_file, b"w+", self._delay_buffer
793 795 )
794 796
795 797 def __index_new_fp(self):
796 798 """internal method to create a new index file for writing
797 799
798 800 You should not use this unless you are upgrading from inline revlog
799 801 """
800 802 return self.opener(
801 803 self.index_file,
802 804 mode=b"w",
803 805 checkambig=self.data_config.check_ambig,
804 806 )
805 807
806 808 def split_inline(self, tr, header, new_index_file_path=None):
807 809 """split the data of an inline revlog into an index and a data file"""
808 810 assert self._delay_buffer is None
809 811 existing_handles = False
810 812 if self._writinghandles is not None:
811 813 existing_handles = True
812 814 fp = self._writinghandles[0]
813 815 fp.flush()
814 816 fp.close()
815 817 # We can't use the cached file handle after close(). So prevent
816 818 # its usage.
817 819 self._writinghandles = None
818 820 self._segmentfile.writing_handle = None
819 821 # No need to deal with sidedata writing handle as it is only
820 822 # relevant with revlog-v2 which is never inline, not reaching
821 823 # this code
822 824
823 825 new_dfh = self.opener(self.data_file, mode=b"w+")
824 826 new_dfh.truncate(0) # drop any potentially existing data
825 827 try:
826 828 with self.reading():
827 829 for r in range(len(self.index)):
828 830 new_dfh.write(self.get_segment_for_revs(r, r)[1])
829 831 new_dfh.flush()
830 832
831 833 if new_index_file_path is not None:
832 834 self.index_file = new_index_file_path
833 835 with self.__index_new_fp() as fp:
834 836 self.inline = False
835 837 for i in range(len(self.index)):
836 838 e = self.index.entry_binary(i)
837 839 if i == 0:
838 840 packed_header = self.index.pack_header(header)
839 841 e = packed_header + e
840 842 fp.write(e)
841 843
842 844 # If we don't use side-write, the temp file replace the real
843 845 # index when we exit the context manager
844 846
845 847 self._segmentfile = randomaccessfile.randomaccessfile(
846 848 self.opener,
847 849 self.data_file,
848 850 self.data_config.chunk_cache_size,
849 851 )
850 852
851 853 if existing_handles:
852 854 # switched from inline to conventional reopen the index
853 855 ifh = self.__index_write_fp()
854 856 self._writinghandles = (ifh, new_dfh, None)
855 857 self._segmentfile.writing_handle = new_dfh
856 858 new_dfh = None
857 859 # No need to deal with sidedata writing handle as it is only
858 860 # relevant with revlog-v2 which is never inline, not reaching
859 861 # this code
860 862 finally:
861 863 if new_dfh is not None:
862 864 new_dfh.close()
863 865 return self.index_file
864 866
865 867 def get_segment_for_revs(self, startrev, endrev):
866 868 """Obtain a segment of raw data corresponding to a range of revisions.
867 869
868 870 Accepts the start and end revisions and an optional already-open
869 871 file handle to be used for reading. If the file handle is read, its
870 872 seek position will not be preserved.
871 873
872 874 Requests for data may be satisfied by a cache.
873 875
874 876 Returns a 2-tuple of (offset, data) for the requested range of
875 877 revisions. Offset is the integer offset from the beginning of the
876 878 revlog and data is a str or buffer of the raw byte data.
877 879
878 880 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
879 881 to determine where each revision's data begins and ends.
880 882
881 883 API: we should consider making this a private part of the InnerRevlog
882 884 at some point.
883 885 """
884 886 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
885 887 # (functions are expensive).
886 888 index = self.index
887 889 istart = index[startrev]
888 890 start = int(istart[0] >> 16)
889 891 if startrev == endrev:
890 892 end = start + istart[1]
891 893 else:
892 894 iend = index[endrev]
893 895 end = int(iend[0] >> 16) + iend[1]
894 896
895 897 if self.inline:
896 898 start += (startrev + 1) * self.index.entry_size
897 899 end += (endrev + 1) * self.index.entry_size
898 900 length = end - start
899 901
900 902 return start, self._segmentfile.read_chunk(start, length)
901 903
902 904 def _chunk(self, rev):
903 905 """Obtain a single decompressed chunk for a revision.
904 906
905 907 Accepts an integer revision and an optional already-open file handle
906 908 to be used for reading. If used, the seek position of the file will not
907 909 be preserved.
908 910
909 911 Returns a str holding uncompressed data for the requested revision.
910 912 """
911 913 if self._uncompressed_chunk_cache is not None:
912 914 uncomp = self._uncompressed_chunk_cache.get(rev)
913 915 if uncomp is not None:
914 916 return uncomp
915 917
916 918 compression_mode = self.index[rev][10]
917 919 data = self.get_segment_for_revs(rev, rev)[1]
918 920 if compression_mode == COMP_MODE_PLAIN:
919 921 uncomp = data
920 922 elif compression_mode == COMP_MODE_DEFAULT:
921 923 uncomp = self._decompressor(data)
922 924 elif compression_mode == COMP_MODE_INLINE:
923 925 uncomp = self.decompress(data)
924 926 else:
925 927 msg = b'unknown compression mode %d'
926 928 msg %= compression_mode
927 929 raise error.RevlogError(msg)
928 930 if self._uncompressed_chunk_cache is not None:
929 931 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
930 932 return uncomp
931 933
932 934 def _chunks(self, revs, targetsize=None):
933 935 """Obtain decompressed chunks for the specified revisions.
934 936
935 937 Accepts an iterable of numeric revisions that are assumed to be in
936 938 ascending order. Also accepts an optional already-open file handle
937 939 to be used for reading. If used, the seek position of the file will
938 940 not be preserved.
939 941
940 942 This function is similar to calling ``self._chunk()`` multiple times,
941 943 but is faster.
942 944
943 945 Returns a list with decompressed data for each requested revision.
944 946 """
945 947 if not revs:
946 948 return []
947 949 start = self.start
948 950 length = self.length
949 951 inline = self.inline
950 952 iosize = self.index.entry_size
951 953 buffer = util.buffer
952 954
953 955 fetched_revs = []
954 956 fadd = fetched_revs.append
955 957
956 958 chunks = []
957 959 ladd = chunks.append
958 960
959 961 if self._uncompressed_chunk_cache is None:
960 962 fetched_revs = revs
961 963 else:
962 964 for rev in revs:
963 965 cached_value = self._uncompressed_chunk_cache.get(rev)
964 966 if cached_value is None:
965 967 fadd(rev)
966 968 else:
967 969 ladd((rev, cached_value))
968 970
969 971 if not fetched_revs:
970 972 slicedchunks = ()
971 973 elif not self.data_config.with_sparse_read:
972 974 slicedchunks = (fetched_revs,)
973 975 else:
974 976 slicedchunks = deltautil.slicechunk(
975 977 self,
976 978 fetched_revs,
977 979 targetsize=targetsize,
978 980 )
979 981
980 982 for revschunk in slicedchunks:
981 983 firstrev = revschunk[0]
982 984 # Skip trailing revisions with empty diff
983 985 for lastrev in revschunk[::-1]:
984 986 if length(lastrev) != 0:
985 987 break
986 988
987 989 try:
988 990 offset, data = self.get_segment_for_revs(firstrev, lastrev)
989 991 except OverflowError:
990 992 # issue4215 - we can't cache a run of chunks greater than
991 993 # 2G on Windows
992 994 for rev in revschunk:
993 995 ladd((rev, self._chunk(rev)))
994 996
995 997 decomp = self.decompress
996 998 # self._decompressor might be None, but will not be used in that case
997 999 def_decomp = self._decompressor
998 1000 for rev in revschunk:
999 1001 chunkstart = start(rev)
1000 1002 if inline:
1001 1003 chunkstart += (rev + 1) * iosize
1002 1004 chunklength = length(rev)
1003 1005 comp_mode = self.index[rev][10]
1004 1006 c = buffer(data, chunkstart - offset, chunklength)
1005 1007 if comp_mode == COMP_MODE_PLAIN:
1006 1008 c = c
1007 1009 elif comp_mode == COMP_MODE_INLINE:
1008 1010 c = decomp(c)
1009 1011 elif comp_mode == COMP_MODE_DEFAULT:
1010 1012 c = def_decomp(c)
1011 1013 else:
1012 1014 msg = b'unknown compression mode %d'
1013 1015 msg %= comp_mode
1014 1016 raise error.RevlogError(msg)
1015 1017 ladd((rev, c))
1016 1018 if self._uncompressed_chunk_cache is not None:
1017 1019 self._uncompressed_chunk_cache.insert(rev, c, len(c))
1018 1020
1019 1021 chunks.sort()
1020 1022 return [x[1] for x in chunks]
1021 1023
1022 1024 def raw_text(self, node, rev):
1023 1025 """return the possibly unvalidated rawtext for a revision
1024 1026
1025 1027 returns (rev, rawtext, validated)
1026 1028 """
1027 1029
1028 1030 # revision in the cache (could be useful to apply delta)
1029 1031 cachedrev = None
1030 1032 # An intermediate text to apply deltas to
1031 1033 basetext = None
1032 1034
1033 1035 # Check if we have the entry in cache
1034 1036 # The cache entry looks like (node, rev, rawtext)
1035 1037 if self._revisioncache:
1036 1038 cachedrev = self._revisioncache[1]
1037 1039
1038 1040 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1039 1041 if stopped:
1040 1042 basetext = self._revisioncache[2]
1041 1043
1042 1044 # drop cache to save memory, the caller is expected to
1043 1045 # update self._inner._revisioncache after validating the text
1044 1046 self._revisioncache = None
1045 1047
1046 1048 targetsize = None
1047 1049 rawsize = self.index[rev][2]
1048 1050 if 0 <= rawsize:
1049 1051 targetsize = 4 * rawsize
1050 1052
1051 1053 if self._uncompressed_chunk_cache is not None:
1052 1054 # dynamically update the uncompressed_chunk_cache size to the
1053 1055 # largest revision we saw in this revlog.
1054 1056 factor = self.data_config.uncompressed_cache_factor
1055 1057 candidate_size = rawsize * factor
1056 1058 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1057 1059 self._uncompressed_chunk_cache.maxcost = candidate_size
1058 1060
1059 1061 bins = self._chunks(chain, targetsize=targetsize)
1060 1062 if basetext is None:
1061 1063 basetext = bytes(bins[0])
1062 1064 bins = bins[1:]
1063 1065
1064 1066 rawtext = mdiff.patches(basetext, bins)
1065 1067 del basetext # let us have a chance to free memory early
1066 1068 return (rev, rawtext, False)
1067 1069
1068 1070 def sidedata(self, rev, sidedata_end):
1069 1071 """Return the sidedata for a given revision number."""
1070 1072 index_entry = self.index[rev]
1071 1073 sidedata_offset = index_entry[8]
1072 1074 sidedata_size = index_entry[9]
1073 1075
1074 1076 if self.inline:
1075 1077 sidedata_offset += self.index.entry_size * (1 + rev)
1076 1078 if sidedata_size == 0:
1077 1079 return {}
1078 1080
1079 1081 if sidedata_end < sidedata_offset + sidedata_size:
1080 1082 filename = self.sidedata_file
1081 1083 end = sidedata_end
1082 1084 offset = sidedata_offset
1083 1085 length = sidedata_size
1084 1086 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1085 1087 raise error.RevlogError(m)
1086 1088
1087 1089 comp_segment = self._segmentfile_sidedata.read_chunk(
1088 1090 sidedata_offset, sidedata_size
1089 1091 )
1090 1092
1091 1093 comp = self.index[rev][11]
1092 1094 if comp == COMP_MODE_PLAIN:
1093 1095 segment = comp_segment
1094 1096 elif comp == COMP_MODE_DEFAULT:
1095 1097 segment = self._decompressor(comp_segment)
1096 1098 elif comp == COMP_MODE_INLINE:
1097 1099 segment = self.decompress(comp_segment)
1098 1100 else:
1099 1101 msg = b'unknown compression mode %d'
1100 1102 msg %= comp
1101 1103 raise error.RevlogError(msg)
1102 1104
1103 1105 sidedata = sidedatautil.deserialize_sidedata(segment)
1104 1106 return sidedata
1105 1107
1106 1108 def write_entry(
1107 1109 self,
1108 1110 transaction,
1109 1111 entry,
1110 1112 data,
1111 1113 link,
1112 1114 offset,
1113 1115 sidedata,
1114 1116 sidedata_offset,
1115 1117 index_end,
1116 1118 data_end,
1117 1119 sidedata_end,
1118 1120 ):
1119 1121 # Files opened in a+ mode have inconsistent behavior on various
1120 1122 # platforms. Windows requires that a file positioning call be made
1121 1123 # when the file handle transitions between reads and writes. See
1122 1124 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1123 1125 # platforms, Python or the platform itself can be buggy. Some versions
1124 1126 # of Solaris have been observed to not append at the end of the file
1125 1127 # if the file was seeked to before the end. See issue4943 for more.
1126 1128 #
1127 1129 # We work around this issue by inserting a seek() before writing.
1128 1130 # Note: This is likely not necessary on Python 3. However, because
1129 1131 # the file handle is reused for reads and may be seeked there, we need
1130 1132 # to be careful before changing this.
1131 1133 if self._writinghandles is None:
1132 1134 msg = b'adding revision outside `revlog._writing` context'
1133 1135 raise error.ProgrammingError(msg)
1134 1136 ifh, dfh, sdfh = self._writinghandles
1135 1137 if index_end is None:
1136 1138 ifh.seek(0, os.SEEK_END)
1137 1139 else:
1138 1140 ifh.seek(index_end, os.SEEK_SET)
1139 1141 if dfh:
1140 1142 if data_end is None:
1141 1143 dfh.seek(0, os.SEEK_END)
1142 1144 else:
1143 1145 dfh.seek(data_end, os.SEEK_SET)
1144 1146 if sdfh:
1145 1147 sdfh.seek(sidedata_end, os.SEEK_SET)
1146 1148
1147 1149 curr = len(self.index) - 1
1148 1150 if not self.inline:
1149 1151 transaction.add(self.data_file, offset)
1150 1152 if self.sidedata_file:
1151 1153 transaction.add(self.sidedata_file, sidedata_offset)
1152 1154 transaction.add(self.canonical_index_file, curr * len(entry))
1153 1155 if data[0]:
1154 1156 dfh.write(data[0])
1155 1157 dfh.write(data[1])
1156 1158 if sidedata:
1157 1159 sdfh.write(sidedata)
1158 1160 if self._delay_buffer is None:
1159 1161 ifh.write(entry)
1160 1162 else:
1161 1163 self._delay_buffer.append(entry)
1162 1164 elif self._delay_buffer is not None:
1163 1165 msg = b'invalid delayed write on inline revlog'
1164 1166 raise error.ProgrammingError(msg)
1165 1167 else:
1166 1168 offset += curr * self.index.entry_size
1167 1169 transaction.add(self.canonical_index_file, offset)
1168 1170 assert not sidedata
1169 1171 ifh.write(entry)
1170 1172 ifh.write(data[0])
1171 1173 ifh.write(data[1])
1172 1174 return (
1173 1175 ifh.tell(),
1174 1176 dfh.tell() if dfh else None,
1175 1177 sdfh.tell() if sdfh else None,
1176 1178 )
1177 1179
1178 1180 def _divert_index(self):
1179 1181 index_file = self.index_file
1180 1182 # when we encounter a legacy inline-changelog, split it. However it is
1181 1183 # important to use the expected filename for pending content
1182 1184 # (<radix>.a) otherwise hooks won't be seeing the content of the
1183 1185 # pending transaction.
1184 1186 if index_file.endswith(b'.s'):
1185 1187 index_file = self.index_file[:-2]
1186 1188 return index_file + b'.a'
1187 1189
1188 1190 def delay(self):
1189 1191 assert not self.is_open
1190 1192 if self.inline:
1191 1193 msg = "revlog with delayed write should not be inline"
1192 1194 raise error.ProgrammingError(msg)
1193 1195 if self._delay_buffer is not None or self._orig_index_file is not None:
1194 1196 # delay or divert already in place
1195 1197 return None
1196 1198 elif len(self.index) == 0:
1197 1199 self._orig_index_file = self.index_file
1198 1200 self.index_file = self._divert_index()
1199 1201 assert self._orig_index_file is not None
1200 1202 assert self.index_file is not None
1201 1203 if self.opener.exists(self.index_file):
1202 1204 self.opener.unlink(self.index_file)
1203 1205 return self.index_file
1204 1206 else:
1205 1207 self._delay_buffer = []
1206 1208 return None
1207 1209
1208 1210 def write_pending(self):
1209 1211 assert not self.is_open
1210 1212 if self.inline:
1211 1213 msg = "revlog with delayed write should not be inline"
1212 1214 raise error.ProgrammingError(msg)
1213 1215 if self._orig_index_file is not None:
1214 1216 return None, True
1215 1217 any_pending = False
1216 1218 pending_index_file = self._divert_index()
1217 1219 if self.opener.exists(pending_index_file):
1218 1220 self.opener.unlink(pending_index_file)
1219 1221 util.copyfile(
1220 1222 self.opener.join(self.index_file),
1221 1223 self.opener.join(pending_index_file),
1222 1224 )
1223 1225 if self._delay_buffer:
1224 1226 with self.opener(pending_index_file, b'r+') as ifh:
1225 1227 ifh.seek(0, os.SEEK_END)
1226 1228 ifh.write(b"".join(self._delay_buffer))
1227 1229 any_pending = True
1228 1230 self._delay_buffer = None
1229 1231 self._orig_index_file = self.index_file
1230 1232 self.index_file = pending_index_file
1231 1233 return self.index_file, any_pending
1232 1234
1233 1235 def finalize_pending(self):
1234 1236 assert not self.is_open
1235 1237 if self.inline:
1236 1238 msg = "revlog with delayed write should not be inline"
1237 1239 raise error.ProgrammingError(msg)
1238 1240
1239 1241 delay = self._delay_buffer is not None
1240 1242 divert = self._orig_index_file is not None
1241 1243
1242 1244 if delay and divert:
1243 1245 assert False, "unreachable"
1244 1246 elif delay:
1245 1247 if self._delay_buffer:
1246 1248 with self.opener(self.index_file, b'r+') as ifh:
1247 1249 ifh.seek(0, os.SEEK_END)
1248 1250 ifh.write(b"".join(self._delay_buffer))
1249 1251 self._delay_buffer = None
1250 1252 elif divert:
1251 1253 if self.opener.exists(self.index_file):
1252 1254 self.opener.rename(
1253 1255 self.index_file,
1254 1256 self._orig_index_file,
1255 1257 checkambig=True,
1256 1258 )
1257 1259 self.index_file = self._orig_index_file
1258 1260 self._orig_index_file = None
1259 1261 else:
1260 1262 msg = b"not delay or divert found on this revlog"
1261 1263 raise error.ProgrammingError(msg)
1262 1264 return self.canonical_index_file
1263 1265
1264 1266
1265 1267 class revlog:
1266 1268 """
1267 1269 the underlying revision storage object
1268 1270
1269 1271 A revlog consists of two parts, an index and the revision data.
1270 1272
1271 1273 The index is a file with a fixed record size containing
1272 1274 information on each revision, including its nodeid (hash), the
1273 1275 nodeids of its parents, the position and offset of its data within
1274 1276 the data file, and the revision it's based on. Finally, each entry
1275 1277 contains a linkrev entry that can serve as a pointer to external
1276 1278 data.
1277 1279
1278 1280 The revision data itself is a linear collection of data chunks.
1279 1281 Each chunk represents a revision and is usually represented as a
1280 1282 delta against the previous chunk. To bound lookup time, runs of
1281 1283 deltas are limited to about 2 times the length of the original
1282 1284 version data. This makes retrieval of a version proportional to
1283 1285 its size, or O(1) relative to the number of revisions.
1284 1286
1285 1287 Both pieces of the revlog are written to in an append-only
1286 1288 fashion, which means we never need to rewrite a file to insert or
1287 1289 remove data, and can use some simple techniques to avoid the need
1288 1290 for locking while reading.
1289 1291
1290 1292 If checkambig, indexfile is opened with checkambig=True at
1291 1293 writing, to avoid file stat ambiguity.
1292 1294
1293 1295 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1294 1296 index will be mmapped rather than read if it is larger than the
1295 1297 configured threshold.
1296 1298
1297 1299 If censorable is True, the revlog can have censored revisions.
1298 1300
1299 1301 If `upperboundcomp` is not None, this is the expected maximal gain from
1300 1302 compression for the data content.
1301 1303
1302 1304 `concurrencychecker` is an optional function that receives 3 arguments: a
1303 1305 file handle, a filename, and an expected position. It should check whether
1304 1306 the current position in the file handle is valid, and log/warn/fail (by
1305 1307 raising).
1306 1308
1307 1309 See mercurial/revlogutils/contants.py for details about the content of an
1308 1310 index entry.
1309 1311 """
1310 1312
1311 1313 _flagserrorclass = error.RevlogError
1312 1314
1313 1315 opener: vfsmod.vfs
1314 1316
1315 1317 @staticmethod
1316 1318 def is_inline_index(header_bytes):
1317 1319 """Determine if a revlog is inline from the initial bytes of the index"""
1318 1320 if len(header_bytes) == 0:
1319 1321 return True
1320 1322
1321 1323 header = INDEX_HEADER.unpack(header_bytes)[0]
1322 1324
1323 1325 _format_flags = header & ~0xFFFF
1324 1326 _format_version = header & 0xFFFF
1325 1327
1326 1328 features = FEATURES_BY_VERSION[_format_version]
1327 1329 return features[b'inline'](_format_flags)
1328 1330
1329 1331 _docket_file: Optional[bytes]
1330 1332
1331 1333 def __init__(
1332 1334 self,
1333 1335 opener: vfsmod.vfs,
1334 1336 target,
1335 1337 radix,
1336 1338 postfix=None, # only exist for `tmpcensored` now
1337 1339 checkambig=False,
1338 1340 mmaplargeindex=False,
1339 1341 censorable=False,
1340 1342 upperboundcomp=None,
1341 1343 persistentnodemap=False,
1342 1344 concurrencychecker=None,
1343 1345 trypending=False,
1344 1346 try_split=False,
1345 1347 canonical_parent_order=True,
1346 1348 data_config=None,
1347 1349 delta_config=None,
1348 1350 feature_config=None,
1349 1351 may_inline=True, # may inline new revlog
1350 1352 ):
1351 1353 """
1352 1354 create a revlog object
1353 1355
1354 1356 opener is a function that abstracts the file opening operation
1355 1357 and can be used to implement COW semantics or the like.
1356 1358
1357 1359 `target`: a (KIND, ID) tuple that identify the content stored in
1358 1360 this revlog. It help the rest of the code to understand what the revlog
1359 1361 is about without having to resort to heuristic and index filename
1360 1362 analysis. Note: that this must be reliably be set by normal code, but
1361 1363 that test, debug, or performance measurement code might not set this to
1362 1364 accurate value.
1363 1365 """
1364 1366
1365 1367 self.radix = radix
1366 1368
1367 1369 self._docket_file = None
1368 1370 self._indexfile = None
1369 1371 self._datafile = None
1370 1372 self._sidedatafile = None
1371 1373 self._nodemap_file = None
1372 1374 self.postfix = postfix
1373 1375 self._trypending = trypending
1374 1376 self._try_split = try_split
1375 1377 self._may_inline = may_inline
1376 1378 self.opener = opener
1377 1379 if persistentnodemap:
1378 1380 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1379 1381
1380 1382 assert target[0] in ALL_KINDS
1381 1383 assert len(target) == 2
1382 1384 self.target = target
1383 1385 if feature_config is not None:
1384 1386 self.feature_config = feature_config.copy()
1385 1387 elif b'feature-config' in self.opener.options:
1386 1388 self.feature_config = self.opener.options[b'feature-config'].copy()
1387 1389 else:
1388 1390 self.feature_config = FeatureConfig()
1389 1391 self.feature_config.censorable = censorable
1390 1392 self.feature_config.canonical_parent_order = canonical_parent_order
1391 1393 if data_config is not None:
1392 1394 self.data_config = data_config.copy()
1393 1395 elif b'data-config' in self.opener.options:
1394 1396 self.data_config = self.opener.options[b'data-config'].copy()
1395 1397 else:
1396 1398 self.data_config = DataConfig()
1397 1399 self.data_config.check_ambig = checkambig
1398 1400 self.data_config.mmap_large_index = mmaplargeindex
1399 1401 if delta_config is not None:
1400 1402 self.delta_config = delta_config.copy()
1401 1403 elif b'delta-config' in self.opener.options:
1402 1404 self.delta_config = self.opener.options[b'delta-config'].copy()
1403 1405 else:
1404 1406 self.delta_config = DeltaConfig()
1405 1407 self.delta_config.upper_bound_comp = upperboundcomp
1406 1408
1407 1409 # Maps rev to chain base rev.
1408 1410 self._chainbasecache = util.lrucachedict(100)
1409 1411
1410 1412 self.index = None
1411 1413 self._docket = None
1412 1414 self._nodemap_docket = None
1413 1415 # Mapping of partial identifiers to full nodes.
1414 1416 self._pcache = {}
1415 1417
1416 1418 # other optionnals features
1417 1419
1418 1420 # Make copy of flag processors so each revlog instance can support
1419 1421 # custom flags.
1420 1422 self._flagprocessors = dict(flagutil.flagprocessors)
1421 1423 # prevent nesting of addgroup
1422 1424 self._adding_group = None
1423 1425
1424 1426 chunk_cache = self._loadindex()
1425 1427 self._load_inner(chunk_cache)
1426 1428 self._concurrencychecker = concurrencychecker
1427 1429
1428 1430 def _init_opts(self):
1429 1431 """process options (from above/config) to setup associated default revlog mode
1430 1432
1431 1433 These values might be affected when actually reading on disk information.
1432 1434
1433 1435 The relevant values are returned for use in _loadindex().
1434 1436
1435 1437 * newversionflags:
1436 1438 version header to use if we need to create a new revlog
1437 1439
1438 1440 * mmapindexthreshold:
1439 1441 minimal index size for start to use mmap
1440 1442
1441 1443 * force_nodemap:
1442 1444 force the usage of a "development" version of the nodemap code
1443 1445 """
1444 1446 opts = self.opener.options
1445 1447
1446 1448 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1447 1449 new_header = CHANGELOGV2
1448 1450 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1449 1451 self.feature_config.compute_rank = compute_rank
1450 1452 elif b'revlogv2' in opts:
1451 1453 new_header = REVLOGV2
1452 1454 elif b'revlogv1' in opts:
1453 1455 new_header = REVLOGV1
1454 1456 if self._may_inline:
1455 1457 new_header |= FLAG_INLINE_DATA
1456 1458 if b'generaldelta' in opts:
1457 1459 new_header |= FLAG_GENERALDELTA
1458 1460 elif b'revlogv0' in self.opener.options:
1459 1461 new_header = REVLOGV0
1460 1462 else:
1461 1463 new_header = REVLOG_DEFAULT_VERSION
1462 1464
1463 1465 mmapindexthreshold = None
1464 1466 if self.data_config.mmap_large_index:
1465 1467 mmapindexthreshold = self.data_config.mmap_index_threshold
1466 1468 if self.feature_config.enable_ellipsis:
1467 1469 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1468 1470
1469 1471 # revlog v0 doesn't have flag processors
1470 1472 for flag, processor in opts.get(b'flagprocessors', {}).items():
1471 1473 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1472 1474
1473 1475 chunk_cache_size = self.data_config.chunk_cache_size
1474 1476 if chunk_cache_size <= 0:
1475 1477 raise error.RevlogError(
1476 1478 _(b'revlog chunk cache size %r is not greater than 0')
1477 1479 % chunk_cache_size
1478 1480 )
1479 1481 elif chunk_cache_size & (chunk_cache_size - 1):
1480 1482 raise error.RevlogError(
1481 1483 _(b'revlog chunk cache size %r is not a power of 2')
1482 1484 % chunk_cache_size
1483 1485 )
1484 1486 force_nodemap = opts.get(b'devel-force-nodemap', False)
1485 1487 return new_header, mmapindexthreshold, force_nodemap
1486 1488
1487 1489 def _get_data(self, filepath, mmap_threshold, size=None):
1488 1490 """return a file content with or without mmap
1489 1491
1490 1492 If the file is missing return the empty string"""
1491 1493 try:
1492 1494 with self.opener(filepath) as fp:
1493 1495 if mmap_threshold is not None:
1494 1496 file_size = self.opener.fstat(fp).st_size
1495 1497 if (
1496 1498 file_size >= mmap_threshold
1497 1499 and self.opener.is_mmap_safe(filepath)
1498 1500 ):
1499 1501 if size is not None:
1500 1502 # avoid potentiel mmap crash
1501 1503 size = min(file_size, size)
1502 1504 # TODO: should .close() to release resources without
1503 1505 # relying on Python GC
1504 1506 if size is None:
1505 1507 return util.buffer(util.mmapread(fp))
1506 1508 else:
1507 1509 return util.buffer(util.mmapread(fp, size))
1508 1510 if size is None:
1509 1511 return fp.read()
1510 1512 else:
1511 1513 return fp.read(size)
1512 1514 except FileNotFoundError:
1513 1515 return b''
1514 1516
1515 1517 def get_streams(self, max_linkrev, force_inline=False):
1516 1518 """return a list of streams that represent this revlog
1517 1519
1518 1520 This is used by stream-clone to do bytes to bytes copies of a repository.
1519 1521
1520 1522 This streams data for all revisions that refer to a changelog revision up
1521 1523 to `max_linkrev`.
1522 1524
1523 1525 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1524 1526
1525 1527 It returns is a list of three-tuple:
1526 1528
1527 1529 [
1528 1530 (filename, bytes_stream, stream_size),
1529 1531 …
1530 1532 ]
1531 1533 """
1532 1534 n = len(self)
1533 1535 index = self.index
1534 1536 while n > 0:
1535 1537 linkrev = index[n - 1][4]
1536 1538 if linkrev < max_linkrev:
1537 1539 break
1538 1540 # note: this loop will rarely go through multiple iterations, since
1539 1541 # it only traverses commits created during the current streaming
1540 1542 # pull operation.
1541 1543 #
1542 1544 # If this become a problem, using a binary search should cap the
1543 1545 # runtime of this.
1544 1546 n = n - 1
1545 1547 if n == 0:
1546 1548 # no data to send
1547 1549 return []
1548 1550 index_size = n * index.entry_size
1549 1551 data_size = self.end(n - 1)
1550 1552
1551 1553 # XXX we might have been split (or stripped) since the object
1552 1554 # initialization, We need to close this race too, but having a way to
1553 1555 # pre-open the file we feed to the revlog and never closing them before
1554 1556 # we are done streaming.
1555 1557
1556 1558 if self._inline:
1557 1559
1558 1560 def get_stream():
1559 1561 with self.opener(self._indexfile, mode=b"r") as fp:
1560 1562 yield None
1561 1563 size = index_size + data_size
1562 1564 if size <= 65536:
1563 1565 yield fp.read(size)
1564 1566 else:
1565 1567 yield from util.filechunkiter(fp, limit=size)
1566 1568
1567 1569 inline_stream = get_stream()
1568 1570 next(inline_stream)
1569 1571 return [
1570 1572 (self._indexfile, inline_stream, index_size + data_size),
1571 1573 ]
1572 1574 elif force_inline:
1573 1575
1574 1576 def get_stream():
1575 1577 with self.reading():
1576 1578 yield None
1577 1579
1578 1580 for rev in range(n):
1579 1581 idx = self.index.entry_binary(rev)
1580 1582 if rev == 0 and self._docket is None:
1581 1583 # re-inject the inline flag
1582 1584 header = self._format_flags
1583 1585 header |= self._format_version
1584 1586 header |= FLAG_INLINE_DATA
1585 1587 header = self.index.pack_header(header)
1586 1588 idx = header + idx
1587 1589 yield idx
1588 1590 yield self._inner.get_segment_for_revs(rev, rev)[1]
1589 1591
1590 1592 inline_stream = get_stream()
1591 1593 next(inline_stream)
1592 1594 return [
1593 1595 (self._indexfile, inline_stream, index_size + data_size),
1594 1596 ]
1595 1597 else:
1596 1598
1597 1599 def get_index_stream():
1598 1600 with self.opener(self._indexfile, mode=b"r") as fp:
1599 1601 yield None
1600 1602 if index_size <= 65536:
1601 1603 yield fp.read(index_size)
1602 1604 else:
1603 1605 yield from util.filechunkiter(fp, limit=index_size)
1604 1606
1605 1607 def get_data_stream():
1606 1608 with self._datafp() as fp:
1607 1609 yield None
1608 1610 if data_size <= 65536:
1609 1611 yield fp.read(data_size)
1610 1612 else:
1611 1613 yield from util.filechunkiter(fp, limit=data_size)
1612 1614
1613 1615 index_stream = get_index_stream()
1614 1616 next(index_stream)
1615 1617 data_stream = get_data_stream()
1616 1618 next(data_stream)
1617 1619 return [
1618 1620 (self._datafile, data_stream, data_size),
1619 1621 (self._indexfile, index_stream, index_size),
1620 1622 ]
1621 1623
1622 1624 def _loadindex(self, docket=None):
1623 1625 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1624 1626
1625 1627 if self.postfix is not None:
1626 1628 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1627 1629 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1628 1630 entry_point = b'%s.i.a' % self.radix
1629 1631 elif self._try_split and self.opener.exists(self._split_index_file):
1630 1632 entry_point = self._split_index_file
1631 1633 else:
1632 1634 entry_point = b'%s.i' % self.radix
1633 1635
1634 1636 if docket is not None:
1635 1637 self._docket = docket
1636 1638 self._docket_file = entry_point
1637 1639 else:
1638 1640 self._initempty = True
1639 1641 entry_data = self._get_data(entry_point, mmapindexthreshold)
1640 1642 if len(entry_data) > 0:
1641 1643 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1642 1644 self._initempty = False
1643 1645 else:
1644 1646 header = new_header
1645 1647
1646 1648 self._format_flags = header & ~0xFFFF
1647 1649 self._format_version = header & 0xFFFF
1648 1650
1649 1651 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1650 1652 if supported_flags is None:
1651 1653 msg = _(b'unknown version (%d) in revlog %s')
1652 1654 msg %= (self._format_version, self.display_id)
1653 1655 raise error.RevlogError(msg)
1654 1656 elif self._format_flags & ~supported_flags:
1655 1657 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1656 1658 display_flag = self._format_flags >> 16
1657 1659 msg %= (display_flag, self._format_version, self.display_id)
1658 1660 raise error.RevlogError(msg)
1659 1661
1660 1662 features = FEATURES_BY_VERSION[self._format_version]
1661 1663 self._inline = features[b'inline'](self._format_flags)
1662 1664 self.delta_config.general_delta = features[b'generaldelta'](
1663 1665 self._format_flags
1664 1666 )
1665 1667 self.feature_config.has_side_data = features[b'sidedata']
1666 1668
1667 1669 if not features[b'docket']:
1668 1670 self._indexfile = entry_point
1669 1671 index_data = entry_data
1670 1672 else:
1671 1673 self._docket_file = entry_point
1672 1674 if self._initempty:
1673 1675 self._docket = docketutil.default_docket(self, header)
1674 1676 else:
1675 1677 self._docket = docketutil.parse_docket(
1676 1678 self, entry_data, use_pending=self._trypending
1677 1679 )
1678 1680
1679 1681 if self._docket is not None:
1680 1682 self._indexfile = self._docket.index_filepath()
1681 1683 index_data = b''
1682 1684 index_size = self._docket.index_end
1683 1685 if index_size > 0:
1684 1686 index_data = self._get_data(
1685 1687 self._indexfile, mmapindexthreshold, size=index_size
1686 1688 )
1687 1689 if len(index_data) < index_size:
1688 1690 msg = _(b'too few index data for %s: got %d, expected %d')
1689 1691 msg %= (self.display_id, len(index_data), index_size)
1690 1692 raise error.RevlogError(msg)
1691 1693
1692 1694 self._inline = False
1693 1695 # generaldelta implied by version 2 revlogs.
1694 1696 self.delta_config.general_delta = True
1695 1697 # the logic for persistent nodemap will be dealt with within the
1696 1698 # main docket, so disable it for now.
1697 1699 self._nodemap_file = None
1698 1700
1699 1701 if self._docket is not None:
1700 1702 self._datafile = self._docket.data_filepath()
1701 1703 self._sidedatafile = self._docket.sidedata_filepath()
1702 1704 elif self.postfix is None:
1703 1705 self._datafile = b'%s.d' % self.radix
1704 1706 else:
1705 1707 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1706 1708
1707 1709 self.nodeconstants = sha1nodeconstants
1708 1710 self.nullid = self.nodeconstants.nullid
1709 1711
1710 1712 # sparse-revlog can't be on without general-delta (issue6056)
1711 1713 if not self.delta_config.general_delta:
1712 1714 self.delta_config.sparse_revlog = False
1713 1715
1714 1716 self._storedeltachains = True
1715 1717
1716 1718 devel_nodemap = (
1717 1719 self._nodemap_file
1718 1720 and force_nodemap
1719 1721 and parse_index_v1_nodemap is not None
1720 1722 )
1721 1723
1722 1724 use_rust_index = False
1723 1725 if rustrevlog is not None and self._nodemap_file is not None:
1724 1726 # we would like to use the rust_index in all case, especially
1725 1727 # because it is necessary for AncestorsIterator and LazyAncestors
1726 1728 # since the 6.7 cycle.
1727 1729 #
1728 1730 # However, the performance impact of inconditionnaly building the
1729 1731 # nodemap is currently a problem for non-persistent nodemap
1730 1732 # repository.
1731 1733 use_rust_index = True
1732 1734
1733 1735 self._parse_index = parse_index_v1
1734 1736 if self._format_version == REVLOGV0:
1735 1737 self._parse_index = revlogv0.parse_index_v0
1736 1738 elif self._format_version == REVLOGV2:
1737 1739 self._parse_index = parse_index_v2
1738 1740 elif self._format_version == CHANGELOGV2:
1739 1741 self._parse_index = parse_index_cl_v2
1740 1742 elif devel_nodemap:
1741 1743 self._parse_index = parse_index_v1_nodemap
1742 1744 elif use_rust_index:
1743 1745 self._parse_index = functools.partial(
1744 1746 parse_index_v1_rust, default_header=new_header
1745 1747 )
1746 1748 try:
1747 1749 d = self._parse_index(index_data, self._inline)
1748 1750 index, chunkcache = d
1749 1751 use_nodemap = (
1750 1752 not self._inline
1751 1753 and self._nodemap_file is not None
1752 1754 and hasattr(index, 'update_nodemap_data')
1753 1755 )
1754 1756 if use_nodemap:
1755 1757 nodemap_data = nodemaputil.persisted_data(self)
1756 1758 if nodemap_data is not None:
1757 1759 docket = nodemap_data[0]
1758 1760 if (
1759 1761 len(d[0]) > docket.tip_rev
1760 1762 and d[0][docket.tip_rev][7] == docket.tip_node
1761 1763 ):
1762 1764 # no changelog tampering
1763 1765 self._nodemap_docket = docket
1764 1766 index.update_nodemap_data(*nodemap_data)
1765 1767 except (ValueError, IndexError):
1766 1768 raise error.RevlogError(
1767 1769 _(b"index %s is corrupted") % self.display_id
1768 1770 )
1769 1771 self.index = index
1770 1772 # revnum -> (chain-length, sum-delta-length)
1771 1773 self._chaininfocache = util.lrucachedict(500)
1772 1774
1773 1775 return chunkcache
1774 1776
1775 1777 def _load_inner(self, chunk_cache):
1776 1778 if self._docket is None:
1777 1779 default_compression_header = None
1778 1780 else:
1779 1781 default_compression_header = self._docket.default_compression_header
1780 1782
1781 1783 self._inner = _InnerRevlog(
1782 1784 opener=self.opener,
1783 1785 index=self.index,
1784 1786 index_file=self._indexfile,
1785 1787 data_file=self._datafile,
1786 1788 sidedata_file=self._sidedatafile,
1787 1789 inline=self._inline,
1788 1790 data_config=self.data_config,
1789 1791 delta_config=self.delta_config,
1790 1792 feature_config=self.feature_config,
1791 1793 chunk_cache=chunk_cache,
1792 1794 default_compression_header=default_compression_header,
1793 1795 )
1794 1796
1795 1797 def get_revlog(self):
1796 1798 """simple function to mirror API of other not-really-revlog API"""
1797 1799 return self
1798 1800
1799 1801 @util.propertycache
1800 1802 def revlog_kind(self):
1801 1803 return self.target[0]
1802 1804
1803 1805 @util.propertycache
1804 1806 def display_id(self):
1805 1807 """The public facing "ID" of the revlog that we use in message"""
1806 1808 if self.revlog_kind == KIND_FILELOG:
1807 1809 # Reference the file without the "data/" prefix, so it is familiar
1808 1810 # to the user.
1809 1811 return self.target[1]
1810 1812 else:
1811 1813 return self.radix
1812 1814
1813 1815 def _datafp(self, mode=b'r'):
1814 1816 """file object for the revlog's data file"""
1815 1817 return self.opener(self._datafile, mode=mode)
1816 1818
1817 1819 def tiprev(self):
1818 1820 return len(self.index) - 1
1819 1821
1820 1822 def tip(self):
1821 1823 return self.node(self.tiprev())
1822 1824
1823 1825 def __contains__(self, rev):
1824 1826 return 0 <= rev < len(self)
1825 1827
1826 1828 def __len__(self):
1827 1829 return len(self.index)
1828 1830
1829 def __iter__(self):
1831 def __iter__(self) -> Iterator[int]:
1830 1832 return iter(range(len(self)))
1831 1833
1832 1834 def revs(self, start=0, stop=None):
1833 1835 """iterate over all rev in this revlog (from start to stop)"""
1834 1836 return storageutil.iterrevs(len(self), start=start, stop=stop)
1835 1837
1836 1838 def hasnode(self, node):
1837 1839 try:
1838 1840 self.rev(node)
1839 1841 return True
1840 1842 except KeyError:
1841 1843 return False
1842 1844
1843 1845 def _candelta(self, baserev, rev):
1844 1846 """whether two revisions (baserev, rev) can be delta-ed or not"""
1845 1847 # Disable delta if either rev requires a content-changing flag
1846 1848 # processor (ex. LFS). This is because such flag processor can alter
1847 1849 # the rawtext content that the delta will be based on, and two clients
1848 1850 # could have a same revlog node with different flags (i.e. different
1849 1851 # rawtext contents) and the delta could be incompatible.
1850 1852 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1851 1853 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1852 1854 ):
1853 1855 return False
1854 1856 return True
1855 1857
1856 1858 def update_caches(self, transaction):
1857 1859 """update on disk cache
1858 1860
1859 1861 If a transaction is passed, the update may be delayed to transaction
1860 1862 commit."""
1861 1863 if self._nodemap_file is not None:
1862 1864 if transaction is None:
1863 1865 nodemaputil.update_persistent_nodemap(self)
1864 1866 else:
1865 1867 nodemaputil.setup_persistent_nodemap(transaction, self)
1866 1868
1867 1869 def clearcaches(self):
1868 1870 """Clear in-memory caches"""
1869 1871 self._chainbasecache.clear()
1870 1872 self._inner.clear_cache()
1871 1873 self._pcache = {}
1872 1874 self._nodemap_docket = None
1873 1875 self.index.clearcaches()
1874 1876 # The python code is the one responsible for validating the docket, we
1875 1877 # end up having to refresh it here.
1876 1878 use_nodemap = (
1877 1879 not self._inline
1878 1880 and self._nodemap_file is not None
1879 1881 and hasattr(self.index, 'update_nodemap_data')
1880 1882 )
1881 1883 if use_nodemap:
1882 1884 nodemap_data = nodemaputil.persisted_data(self)
1883 1885 if nodemap_data is not None:
1884 1886 self._nodemap_docket = nodemap_data[0]
1885 1887 self.index.update_nodemap_data(*nodemap_data)
1886 1888
1887 1889 def rev(self, node):
1888 1890 """return the revision number associated with a <nodeid>"""
1889 1891 try:
1890 1892 return self.index.rev(node)
1891 1893 except TypeError:
1892 1894 raise
1893 1895 except error.RevlogError:
1894 1896 # parsers.c radix tree lookup failed
1895 1897 if (
1896 1898 node == self.nodeconstants.wdirid
1897 1899 or node in self.nodeconstants.wdirfilenodeids
1898 1900 ):
1899 1901 raise error.WdirUnsupported
1900 1902 raise error.LookupError(node, self.display_id, _(b'no node'))
1901 1903
1902 1904 # Accessors for index entries.
1903 1905
1904 1906 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1905 1907 # are flags.
1906 1908 def start(self, rev):
1907 1909 return int(self.index[rev][0] >> 16)
1908 1910
1909 1911 def sidedata_cut_off(self, rev):
1910 1912 sd_cut_off = self.index[rev][8]
1911 1913 if sd_cut_off != 0:
1912 1914 return sd_cut_off
1913 1915 # This is some annoying dance, because entries without sidedata
1914 1916 # currently use 0 as their ofsset. (instead of previous-offset +
1915 1917 # previous-size)
1916 1918 #
1917 1919 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1918 1920 # In the meantime, we need this.
1919 1921 while 0 <= rev:
1920 1922 e = self.index[rev]
1921 1923 if e[9] != 0:
1922 1924 return e[8] + e[9]
1923 1925 rev -= 1
1924 1926 return 0
1925 1927
1926 1928 def flags(self, rev):
1927 1929 return self.index[rev][0] & 0xFFFF
1928 1930
1929 1931 def length(self, rev):
1930 1932 return self.index[rev][1]
1931 1933
1932 1934 def sidedata_length(self, rev):
1933 1935 if not self.feature_config.has_side_data:
1934 1936 return 0
1935 1937 return self.index[rev][9]
1936 1938
1937 1939 def rawsize(self, rev):
1938 1940 """return the length of the uncompressed text for a given revision"""
1939 1941 l = self.index[rev][2]
1940 1942 if l >= 0:
1941 1943 return l
1942 1944
1943 1945 t = self.rawdata(rev)
1944 1946 return len(t)
1945 1947
1946 1948 def size(self, rev):
1947 1949 """length of non-raw text (processed by a "read" flag processor)"""
1948 1950 # fast path: if no "read" flag processor could change the content,
1949 1951 # size is rawsize. note: ELLIPSIS is known to not change the content.
1950 1952 flags = self.flags(rev)
1951 1953 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1952 1954 return self.rawsize(rev)
1953 1955
1954 1956 return len(self.revision(rev))
1955 1957
1956 1958 def fast_rank(self, rev):
1957 1959 """Return the rank of a revision if already known, or None otherwise.
1958 1960
1959 1961 The rank of a revision is the size of the sub-graph it defines as a
1960 1962 head. Equivalently, the rank of a revision `r` is the size of the set
1961 1963 `ancestors(r)`, `r` included.
1962 1964
1963 1965 This method returns the rank retrieved from the revlog in constant
1964 1966 time. It makes no attempt at computing unknown values for versions of
1965 1967 the revlog which do not persist the rank.
1966 1968 """
1967 1969 rank = self.index[rev][ENTRY_RANK]
1968 1970 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1969 1971 return None
1970 1972 if rev == nullrev:
1971 1973 return 0 # convention
1972 1974 return rank
1973 1975
1974 1976 def chainbase(self, rev):
1975 1977 base = self._chainbasecache.get(rev)
1976 1978 if base is not None:
1977 1979 return base
1978 1980
1979 1981 index = self.index
1980 1982 iterrev = rev
1981 1983 base = index[iterrev][3]
1982 1984 while base != iterrev:
1983 1985 iterrev = base
1984 1986 base = index[iterrev][3]
1985 1987
1986 1988 self._chainbasecache[rev] = base
1987 1989 return base
1988 1990
1989 1991 def linkrev(self, rev):
1990 1992 return self.index[rev][4]
1991 1993
1992 1994 def parentrevs(self, rev):
1993 1995 try:
1994 1996 entry = self.index[rev]
1995 1997 except IndexError:
1996 1998 if rev == wdirrev:
1997 1999 raise error.WdirUnsupported
1998 2000 raise
1999 2001
2000 2002 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
2001 2003 return entry[6], entry[5]
2002 2004 else:
2003 2005 return entry[5], entry[6]
2004 2006
2005 2007 # fast parentrevs(rev) where rev isn't filtered
2006 2008 _uncheckedparentrevs = parentrevs
2007 2009
2008 2010 def node(self, rev):
2009 2011 try:
2010 2012 return self.index[rev][7]
2011 2013 except IndexError:
2012 2014 if rev == wdirrev:
2013 2015 raise error.WdirUnsupported
2014 2016 raise
2015 2017
2016 2018 # Derived from index values.
2017 2019
2018 2020 def end(self, rev):
2019 2021 return self.start(rev) + self.length(rev)
2020 2022
2021 2023 def parents(self, node):
2022 2024 i = self.index
2023 2025 d = i[self.rev(node)]
2024 2026 # inline node() to avoid function call overhead
2025 2027 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2026 2028 return i[d[6]][7], i[d[5]][7]
2027 2029 else:
2028 2030 return i[d[5]][7], i[d[6]][7]
2029 2031
2030 2032 def chainlen(self, rev):
2031 2033 return self._chaininfo(rev)[0]
2032 2034
2033 2035 def _chaininfo(self, rev):
2034 2036 chaininfocache = self._chaininfocache
2035 2037 if rev in chaininfocache:
2036 2038 return chaininfocache[rev]
2037 2039 index = self.index
2038 2040 generaldelta = self.delta_config.general_delta
2039 2041 iterrev = rev
2040 2042 e = index[iterrev]
2041 2043 clen = 0
2042 2044 compresseddeltalen = 0
2043 2045 while iterrev != e[3]:
2044 2046 clen += 1
2045 2047 compresseddeltalen += e[1]
2046 2048 if generaldelta:
2047 2049 iterrev = e[3]
2048 2050 else:
2049 2051 iterrev -= 1
2050 2052 if iterrev in chaininfocache:
2051 2053 t = chaininfocache[iterrev]
2052 2054 clen += t[0]
2053 2055 compresseddeltalen += t[1]
2054 2056 break
2055 2057 e = index[iterrev]
2056 2058 else:
2057 2059 # Add text length of base since decompressing that also takes
2058 2060 # work. For cache hits the length is already included.
2059 2061 compresseddeltalen += e[1]
2060 2062 r = (clen, compresseddeltalen)
2061 2063 chaininfocache[rev] = r
2062 2064 return r
2063 2065
2064 2066 def _deltachain(self, rev, stoprev=None):
2065 2067 return self._inner._deltachain(rev, stoprev=stoprev)
2066 2068
2067 2069 def ancestors(self, revs, stoprev=0, inclusive=False):
2068 2070 """Generate the ancestors of 'revs' in reverse revision order.
2069 2071 Does not generate revs lower than stoprev.
2070 2072
2071 2073 See the documentation for ancestor.lazyancestors for more details."""
2072 2074
2073 2075 # first, make sure start revisions aren't filtered
2074 2076 revs = list(revs)
2075 2077 checkrev = self.node
2076 2078 for r in revs:
2077 2079 checkrev(r)
2078 2080 # and we're sure ancestors aren't filtered as well
2079 2081
2080 2082 if rustancestor is not None and self.index.rust_ext_compat:
2081 2083 lazyancestors = rustancestor.LazyAncestors
2082 2084 arg = self.index
2083 2085 else:
2084 2086 lazyancestors = ancestor.lazyancestors
2085 2087 arg = self._uncheckedparentrevs
2086 2088 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2087 2089
2088 2090 def descendants(self, revs):
2089 2091 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2090 2092
2091 2093 def findcommonmissing(self, common=None, heads=None):
2092 2094 """Return a tuple of the ancestors of common and the ancestors of heads
2093 2095 that are not ancestors of common. In revset terminology, we return the
2094 2096 tuple:
2095 2097
2096 2098 ::common, (::heads) - (::common)
2097 2099
2098 2100 The list is sorted by revision number, meaning it is
2099 2101 topologically sorted.
2100 2102
2101 2103 'heads' and 'common' are both lists of node IDs. If heads is
2102 2104 not supplied, uses all of the revlog's heads. If common is not
2103 2105 supplied, uses nullid."""
2104 2106 if common is None:
2105 2107 common = [self.nullid]
2106 2108 if heads is None:
2107 2109 heads = self.heads()
2108 2110
2109 2111 common = [self.rev(n) for n in common]
2110 2112 heads = [self.rev(n) for n in heads]
2111 2113
2112 2114 # we want the ancestors, but inclusive
2113 2115 class lazyset:
2114 2116 def __init__(self, lazyvalues):
2115 2117 self.addedvalues = set()
2116 2118 self.lazyvalues = lazyvalues
2117 2119
2118 2120 def __contains__(self, value):
2119 2121 return value in self.addedvalues or value in self.lazyvalues
2120 2122
2121 2123 def __iter__(self):
2122 2124 added = self.addedvalues
2123 2125 for r in added:
2124 2126 yield r
2125 2127 for r in self.lazyvalues:
2126 2128 if not r in added:
2127 2129 yield r
2128 2130
2129 2131 def add(self, value):
2130 2132 self.addedvalues.add(value)
2131 2133
2132 2134 def update(self, values):
2133 2135 self.addedvalues.update(values)
2134 2136
2135 2137 has = lazyset(self.ancestors(common))
2136 2138 has.add(nullrev)
2137 2139 has.update(common)
2138 2140
2139 2141 # take all ancestors from heads that aren't in has
2140 2142 missing = set()
2141 2143 visit = collections.deque(r for r in heads if r not in has)
2142 2144 while visit:
2143 2145 r = visit.popleft()
2144 2146 if r in missing:
2145 2147 continue
2146 2148 else:
2147 2149 missing.add(r)
2148 2150 for p in self.parentrevs(r):
2149 2151 if p not in has:
2150 2152 visit.append(p)
2151 2153 missing = list(missing)
2152 2154 missing.sort()
2153 2155 return has, [self.node(miss) for miss in missing]
2154 2156
2155 2157 def incrementalmissingrevs(self, common=None):
2156 2158 """Return an object that can be used to incrementally compute the
2157 2159 revision numbers of the ancestors of arbitrary sets that are not
2158 2160 ancestors of common. This is an ancestor.incrementalmissingancestors
2159 2161 object.
2160 2162
2161 2163 'common' is a list of revision numbers. If common is not supplied, uses
2162 2164 nullrev.
2163 2165 """
2164 2166 if common is None:
2165 2167 common = [nullrev]
2166 2168
2167 2169 if rustancestor is not None and self.index.rust_ext_compat:
2168 2170 return rustancestor.MissingAncestors(self.index, common)
2169 2171 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2170 2172
2171 2173 def findmissingrevs(self, common=None, heads=None):
2172 2174 """Return the revision numbers of the ancestors of heads that
2173 2175 are not ancestors of common.
2174 2176
2175 2177 More specifically, return a list of revision numbers corresponding to
2176 2178 nodes N such that every N satisfies the following constraints:
2177 2179
2178 2180 1. N is an ancestor of some node in 'heads'
2179 2181 2. N is not an ancestor of any node in 'common'
2180 2182
2181 2183 The list is sorted by revision number, meaning it is
2182 2184 topologically sorted.
2183 2185
2184 2186 'heads' and 'common' are both lists of revision numbers. If heads is
2185 2187 not supplied, uses all of the revlog's heads. If common is not
2186 2188 supplied, uses nullid."""
2187 2189 if common is None:
2188 2190 common = [nullrev]
2189 2191 if heads is None:
2190 2192 heads = self.headrevs()
2191 2193
2192 2194 inc = self.incrementalmissingrevs(common=common)
2193 2195 return inc.missingancestors(heads)
2194 2196
2195 2197 def findmissing(self, common=None, heads=None):
2196 2198 """Return the ancestors of heads that are not ancestors of common.
2197 2199
2198 2200 More specifically, return a list of nodes N such that every N
2199 2201 satisfies the following constraints:
2200 2202
2201 2203 1. N is an ancestor of some node in 'heads'
2202 2204 2. N is not an ancestor of any node in 'common'
2203 2205
2204 2206 The list is sorted by revision number, meaning it is
2205 2207 topologically sorted.
2206 2208
2207 2209 'heads' and 'common' are both lists of node IDs. If heads is
2208 2210 not supplied, uses all of the revlog's heads. If common is not
2209 2211 supplied, uses nullid."""
2210 2212 if common is None:
2211 2213 common = [self.nullid]
2212 2214 if heads is None:
2213 2215 heads = self.heads()
2214 2216
2215 2217 common = [self.rev(n) for n in common]
2216 2218 heads = [self.rev(n) for n in heads]
2217 2219
2218 2220 inc = self.incrementalmissingrevs(common=common)
2219 2221 return [self.node(r) for r in inc.missingancestors(heads)]
2220 2222
2221 2223 def nodesbetween(self, roots=None, heads=None):
2222 2224 """Return a topological path from 'roots' to 'heads'.
2223 2225
2224 2226 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2225 2227 topologically sorted list of all nodes N that satisfy both of
2226 2228 these constraints:
2227 2229
2228 2230 1. N is a descendant of some node in 'roots'
2229 2231 2. N is an ancestor of some node in 'heads'
2230 2232
2231 2233 Every node is considered to be both a descendant and an ancestor
2232 2234 of itself, so every reachable node in 'roots' and 'heads' will be
2233 2235 included in 'nodes'.
2234 2236
2235 2237 'outroots' is the list of reachable nodes in 'roots', i.e., the
2236 2238 subset of 'roots' that is returned in 'nodes'. Likewise,
2237 2239 'outheads' is the subset of 'heads' that is also in 'nodes'.
2238 2240
2239 2241 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2240 2242 unspecified, uses nullid as the only root. If 'heads' is
2241 2243 unspecified, uses list of all of the revlog's heads."""
2242 2244 nonodes = ([], [], [])
2243 2245 if roots is not None:
2244 2246 roots = list(roots)
2245 2247 if not roots:
2246 2248 return nonodes
2247 2249 lowestrev = min([self.rev(n) for n in roots])
2248 2250 else:
2249 2251 roots = [self.nullid] # Everybody's a descendant of nullid
2250 2252 lowestrev = nullrev
2251 2253 if (lowestrev == nullrev) and (heads is None):
2252 2254 # We want _all_ the nodes!
2253 2255 return (
2254 2256 [self.node(r) for r in self],
2255 2257 [self.nullid],
2256 2258 list(self.heads()),
2257 2259 )
2258 2260 if heads is None:
2259 2261 # All nodes are ancestors, so the latest ancestor is the last
2260 2262 # node.
2261 2263 highestrev = len(self) - 1
2262 2264 # Set ancestors to None to signal that every node is an ancestor.
2263 2265 ancestors = None
2264 2266 # Set heads to an empty dictionary for later discovery of heads
2265 2267 heads = {}
2266 2268 else:
2267 2269 heads = list(heads)
2268 2270 if not heads:
2269 2271 return nonodes
2270 2272 ancestors = set()
2271 2273 # Turn heads into a dictionary so we can remove 'fake' heads.
2272 2274 # Also, later we will be using it to filter out the heads we can't
2273 2275 # find from roots.
2274 2276 heads = dict.fromkeys(heads, False)
2275 2277 # Start at the top and keep marking parents until we're done.
2276 2278 nodestotag = set(heads)
2277 2279 # Remember where the top was so we can use it as a limit later.
2278 2280 highestrev = max([self.rev(n) for n in nodestotag])
2279 2281 while nodestotag:
2280 2282 # grab a node to tag
2281 2283 n = nodestotag.pop()
2282 2284 # Never tag nullid
2283 2285 if n == self.nullid:
2284 2286 continue
2285 2287 # A node's revision number represents its place in a
2286 2288 # topologically sorted list of nodes.
2287 2289 r = self.rev(n)
2288 2290 if r >= lowestrev:
2289 2291 if n not in ancestors:
2290 2292 # If we are possibly a descendant of one of the roots
2291 2293 # and we haven't already been marked as an ancestor
2292 2294 ancestors.add(n) # Mark as ancestor
2293 2295 # Add non-nullid parents to list of nodes to tag.
2294 2296 nodestotag.update(
2295 2297 [p for p in self.parents(n) if p != self.nullid]
2296 2298 )
2297 2299 elif n in heads: # We've seen it before, is it a fake head?
2298 2300 # So it is, real heads should not be the ancestors of
2299 2301 # any other heads.
2300 2302 heads.pop(n)
2301 2303 if not ancestors:
2302 2304 return nonodes
2303 2305 # Now that we have our set of ancestors, we want to remove any
2304 2306 # roots that are not ancestors.
2305 2307
2306 2308 # If one of the roots was nullid, everything is included anyway.
2307 2309 if lowestrev > nullrev:
2308 2310 # But, since we weren't, let's recompute the lowest rev to not
2309 2311 # include roots that aren't ancestors.
2310 2312
2311 2313 # Filter out roots that aren't ancestors of heads
2312 2314 roots = [root for root in roots if root in ancestors]
2313 2315 # Recompute the lowest revision
2314 2316 if roots:
2315 2317 lowestrev = min([self.rev(root) for root in roots])
2316 2318 else:
2317 2319 # No more roots? Return empty list
2318 2320 return nonodes
2319 2321 else:
2320 2322 # We are descending from nullid, and don't need to care about
2321 2323 # any other roots.
2322 2324 lowestrev = nullrev
2323 2325 roots = [self.nullid]
2324 2326 # Transform our roots list into a set.
2325 2327 descendants = set(roots)
2326 2328 # Also, keep the original roots so we can filter out roots that aren't
2327 2329 # 'real' roots (i.e. are descended from other roots).
2328 2330 roots = descendants.copy()
2329 2331 # Our topologically sorted list of output nodes.
2330 2332 orderedout = []
2331 2333 # Don't start at nullid since we don't want nullid in our output list,
2332 2334 # and if nullid shows up in descendants, empty parents will look like
2333 2335 # they're descendants.
2334 2336 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2335 2337 n = self.node(r)
2336 2338 isdescendant = False
2337 2339 if lowestrev == nullrev: # Everybody is a descendant of nullid
2338 2340 isdescendant = True
2339 2341 elif n in descendants:
2340 2342 # n is already a descendant
2341 2343 isdescendant = True
2342 2344 # This check only needs to be done here because all the roots
2343 2345 # will start being marked is descendants before the loop.
2344 2346 if n in roots:
2345 2347 # If n was a root, check if it's a 'real' root.
2346 2348 p = tuple(self.parents(n))
2347 2349 # If any of its parents are descendants, it's not a root.
2348 2350 if (p[0] in descendants) or (p[1] in descendants):
2349 2351 roots.remove(n)
2350 2352 else:
2351 2353 p = tuple(self.parents(n))
2352 2354 # A node is a descendant if either of its parents are
2353 2355 # descendants. (We seeded the dependents list with the roots
2354 2356 # up there, remember?)
2355 2357 if (p[0] in descendants) or (p[1] in descendants):
2356 2358 descendants.add(n)
2357 2359 isdescendant = True
2358 2360 if isdescendant and ((ancestors is None) or (n in ancestors)):
2359 2361 # Only include nodes that are both descendants and ancestors.
2360 2362 orderedout.append(n)
2361 2363 if (ancestors is not None) and (n in heads):
2362 2364 # We're trying to figure out which heads are reachable
2363 2365 # from roots.
2364 2366 # Mark this head as having been reached
2365 2367 heads[n] = True
2366 2368 elif ancestors is None:
2367 2369 # Otherwise, we're trying to discover the heads.
2368 2370 # Assume this is a head because if it isn't, the next step
2369 2371 # will eventually remove it.
2370 2372 heads[n] = True
2371 2373 # But, obviously its parents aren't.
2372 2374 for p in self.parents(n):
2373 2375 heads.pop(p, None)
2374 2376 heads = [head for head, flag in heads.items() if flag]
2375 2377 roots = list(roots)
2376 2378 assert orderedout
2377 2379 assert roots
2378 2380 assert heads
2379 2381 return (orderedout, roots, heads)
2380 2382
2381 2383 def headrevs(self, revs=None):
2382 2384 if revs is None:
2383 2385 try:
2384 2386 return self.index.headrevs()
2385 2387 except AttributeError:
2386 2388 return self._headrevs()
2387 2389 if rustdagop is not None and self.index.rust_ext_compat:
2388 2390 return rustdagop.headrevs(self.index, revs)
2389 2391 return dagop.headrevs(revs, self._uncheckedparentrevs)
2390 2392
2391 2393 def headrevsdiff(self, start, stop):
2392 2394 try:
2393 2395 return self.index.headrevsdiff(start, stop)
2394 2396 except AttributeError:
2395 2397 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2396 2398
2397 2399 def computephases(self, roots):
2398 2400 return self.index.computephasesmapsets(roots)
2399 2401
2400 2402 def _headrevs(self):
2401 2403 count = len(self)
2402 2404 if not count:
2403 2405 return [nullrev]
2404 2406 # we won't iter over filtered rev so nobody is a head at start
2405 2407 ishead = [0] * (count + 1)
2406 2408 index = self.index
2407 2409 for r in self:
2408 2410 ishead[r] = 1 # I may be an head
2409 2411 e = index[r]
2410 2412 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2411 2413 return [r for r, val in enumerate(ishead) if val]
2412 2414
2413 2415 def _head_node_ids(self):
2414 2416 try:
2415 2417 return self.index.head_node_ids()
2416 2418 except AttributeError:
2417 2419 return [self.node(r) for r in self.headrevs()]
2418 2420
2419 2421 def heads(self, start=None, stop=None):
2420 2422 """return the list of all nodes that have no children
2421 2423
2422 2424 if start is specified, only heads that are descendants of
2423 2425 start will be returned
2424 2426 if stop is specified, it will consider all the revs from stop
2425 2427 as if they had no children
2426 2428 """
2427 2429 if start is None and stop is None:
2428 2430 if not len(self):
2429 2431 return [self.nullid]
2430 2432 return self._head_node_ids()
2431 2433 if start is None:
2432 2434 start = nullrev
2433 2435 else:
2434 2436 start = self.rev(start)
2435 2437
2436 2438 stoprevs = {self.rev(n) for n in stop or []}
2437 2439
2438 2440 revs = dagop.headrevssubset(
2439 2441 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2440 2442 )
2441 2443
2442 2444 return [self.node(rev) for rev in revs]
2443 2445
2444 2446 def diffheads(self, start, stop):
2445 2447 """return the nodes that make up the difference between
2446 2448 heads of revs before `start` and heads of revs before `stop`"""
2447 2449 removed, added = self.headrevsdiff(start, stop)
2448 2450 return [self.node(r) for r in removed], [self.node(r) for r in added]
2449 2451
2450 2452 def children(self, node):
2451 2453 """find the children of a given node"""
2452 2454 c = []
2453 2455 p = self.rev(node)
2454 2456 for r in self.revs(start=p + 1):
2455 2457 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2456 2458 if prevs:
2457 2459 for pr in prevs:
2458 2460 if pr == p:
2459 2461 c.append(self.node(r))
2460 2462 elif p == nullrev:
2461 2463 c.append(self.node(r))
2462 2464 return c
2463 2465
2464 2466 def commonancestorsheads(self, a, b):
2465 2467 """calculate all the heads of the common ancestors of nodes a and b"""
2466 2468 a, b = self.rev(a), self.rev(b)
2467 2469 ancs = self._commonancestorsheads(a, b)
2468 2470 return pycompat.maplist(self.node, ancs)
2469 2471
2470 2472 def _commonancestorsheads(self, *revs):
2471 2473 """calculate all the heads of the common ancestors of revs"""
2472 2474 try:
2473 2475 ancs = self.index.commonancestorsheads(*revs)
2474 2476 except (AttributeError, OverflowError): # C implementation failed
2475 2477 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2476 2478 return ancs
2477 2479
2478 2480 def isancestor(self, a, b):
2479 2481 """return True if node a is an ancestor of node b
2480 2482
2481 2483 A revision is considered an ancestor of itself."""
2482 2484 a, b = self.rev(a), self.rev(b)
2483 2485 return self.isancestorrev(a, b)
2484 2486
2485 2487 def isancestorrev(self, a, b):
2486 2488 """return True if revision a is an ancestor of revision b
2487 2489
2488 2490 A revision is considered an ancestor of itself.
2489 2491
2490 2492 The implementation of this is trivial but the use of
2491 2493 reachableroots is not."""
2492 2494 if a == nullrev:
2493 2495 return True
2494 2496 elif a == b:
2495 2497 return True
2496 2498 elif a > b:
2497 2499 return False
2498 2500 return bool(self.reachableroots(a, [b], [a], includepath=False))
2499 2501
2500 2502 def reachableroots(self, minroot, heads, roots, includepath=False):
2501 2503 """return (heads(::(<roots> and <roots>::<heads>)))
2502 2504
2503 2505 If includepath is True, return (<roots>::<heads>)."""
2504 2506 try:
2505 2507 return self.index.reachableroots2(
2506 2508 minroot, heads, roots, includepath
2507 2509 )
2508 2510 except AttributeError:
2509 2511 return dagop._reachablerootspure(
2510 2512 self.parentrevs, minroot, roots, heads, includepath
2511 2513 )
2512 2514
2513 2515 def ancestor(self, a, b):
2514 2516 """calculate the "best" common ancestor of nodes a and b"""
2515 2517
2516 2518 a, b = self.rev(a), self.rev(b)
2517 2519 try:
2518 2520 ancs = self.index.ancestors(a, b)
2519 2521 except (AttributeError, OverflowError):
2520 2522 ancs = ancestor.ancestors(self.parentrevs, a, b)
2521 2523 if ancs:
2522 2524 # choose a consistent winner when there's a tie
2523 2525 return min(map(self.node, ancs))
2524 2526 return self.nullid
2525 2527
2526 2528 def _match(self, id):
2527 2529 if isinstance(id, int):
2528 2530 # rev
2529 2531 return self.node(id)
2530 2532 if len(id) == self.nodeconstants.nodelen:
2531 2533 # possibly a binary node
2532 2534 # odds of a binary node being all hex in ASCII are 1 in 10**25
2533 2535 try:
2534 2536 node = id
2535 2537 self.rev(node) # quick search the index
2536 2538 return node
2537 2539 except error.LookupError:
2538 2540 pass # may be partial hex id
2539 2541 try:
2540 2542 # str(rev)
2541 2543 rev = int(id)
2542 2544 if b"%d" % rev != id:
2543 2545 raise ValueError
2544 2546 if rev < 0:
2545 2547 rev = len(self) + rev
2546 2548 if rev < 0 or rev >= len(self):
2547 2549 raise ValueError
2548 2550 return self.node(rev)
2549 2551 except (ValueError, OverflowError):
2550 2552 pass
2551 2553 if len(id) == 2 * self.nodeconstants.nodelen:
2552 2554 try:
2553 2555 # a full hex nodeid?
2554 2556 node = bin(id)
2555 2557 self.rev(node)
2556 2558 return node
2557 2559 except (binascii.Error, error.LookupError):
2558 2560 pass
2559 2561
2560 2562 def _partialmatch(self, id):
2561 2563 # we don't care wdirfilenodeids as they should be always full hash
2562 2564 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2563 2565 ambiguous = False
2564 2566 try:
2565 2567 partial = self.index.partialmatch(id)
2566 2568 if partial and self.hasnode(partial):
2567 2569 if maybewdir:
2568 2570 # single 'ff...' match in radix tree, ambiguous with wdir
2569 2571 ambiguous = True
2570 2572 else:
2571 2573 return partial
2572 2574 elif maybewdir:
2573 2575 # no 'ff...' match in radix tree, wdir identified
2574 2576 raise error.WdirUnsupported
2575 2577 else:
2576 2578 return None
2577 2579 except error.RevlogError:
2578 2580 # parsers.c radix tree lookup gave multiple matches
2579 2581 # fast path: for unfiltered changelog, radix tree is accurate
2580 2582 if not getattr(self, 'filteredrevs', None):
2581 2583 ambiguous = True
2582 2584 # fall through to slow path that filters hidden revisions
2583 2585 except (AttributeError, ValueError):
2584 2586 # we are pure python, or key is not hex
2585 2587 pass
2586 2588 if ambiguous:
2587 2589 raise error.AmbiguousPrefixLookupError(
2588 2590 id, self.display_id, _(b'ambiguous identifier')
2589 2591 )
2590 2592
2591 2593 if id in self._pcache:
2592 2594 return self._pcache[id]
2593 2595
2594 2596 if len(id) <= 40:
2595 2597 # hex(node)[:...]
2596 2598 l = len(id) // 2 * 2 # grab an even number of digits
2597 2599 try:
2598 2600 # we're dropping the last digit, so let's check that it's hex,
2599 2601 # to avoid the expensive computation below if it's not
2600 2602 if len(id) % 2 > 0:
2601 2603 if not (id[-1] in hexdigits):
2602 2604 return None
2603 2605 prefix = bin(id[:l])
2604 2606 except binascii.Error:
2605 2607 pass
2606 2608 else:
2607 2609 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2608 2610 nl = [
2609 2611 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2610 2612 ]
2611 2613 if self.nodeconstants.nullhex.startswith(id):
2612 2614 nl.append(self.nullid)
2613 2615 if len(nl) > 0:
2614 2616 if len(nl) == 1 and not maybewdir:
2615 2617 self._pcache[id] = nl[0]
2616 2618 return nl[0]
2617 2619 raise error.AmbiguousPrefixLookupError(
2618 2620 id, self.display_id, _(b'ambiguous identifier')
2619 2621 )
2620 2622 if maybewdir:
2621 2623 raise error.WdirUnsupported
2622 2624 return None
2623 2625
2624 2626 def lookup(self, id):
2625 2627 """locate a node based on:
2626 2628 - revision number or str(revision number)
2627 2629 - nodeid or subset of hex nodeid
2628 2630 """
2629 2631 n = self._match(id)
2630 2632 if n is not None:
2631 2633 return n
2632 2634 n = self._partialmatch(id)
2633 2635 if n:
2634 2636 return n
2635 2637
2636 2638 raise error.LookupError(id, self.display_id, _(b'no match found'))
2637 2639
2638 2640 def shortest(self, node, minlength=1):
2639 2641 """Find the shortest unambiguous prefix that matches node."""
2640 2642
2641 2643 def isvalid(prefix):
2642 2644 try:
2643 2645 matchednode = self._partialmatch(prefix)
2644 2646 except error.AmbiguousPrefixLookupError:
2645 2647 return False
2646 2648 except error.WdirUnsupported:
2647 2649 # single 'ff...' match
2648 2650 return True
2649 2651 if matchednode is None:
2650 2652 raise error.LookupError(node, self.display_id, _(b'no node'))
2651 2653 return True
2652 2654
2653 2655 def maybewdir(prefix):
2654 2656 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2655 2657
2656 2658 hexnode = hex(node)
2657 2659
2658 2660 def disambiguate(hexnode, minlength):
2659 2661 """Disambiguate against wdirid."""
2660 2662 for length in range(minlength, len(hexnode) + 1):
2661 2663 prefix = hexnode[:length]
2662 2664 if not maybewdir(prefix):
2663 2665 return prefix
2664 2666
2665 2667 if not getattr(self, 'filteredrevs', None):
2666 2668 try:
2667 2669 length = max(self.index.shortest(node), minlength)
2668 2670 return disambiguate(hexnode, length)
2669 2671 except error.RevlogError:
2670 2672 if node != self.nodeconstants.wdirid:
2671 2673 raise error.LookupError(
2672 2674 node, self.display_id, _(b'no node')
2673 2675 )
2674 2676 except AttributeError:
2675 2677 # Fall through to pure code
2676 2678 pass
2677 2679
2678 2680 if node == self.nodeconstants.wdirid:
2679 2681 for length in range(minlength, len(hexnode) + 1):
2680 2682 prefix = hexnode[:length]
2681 2683 if isvalid(prefix):
2682 2684 return prefix
2683 2685
2684 2686 for length in range(minlength, len(hexnode) + 1):
2685 2687 prefix = hexnode[:length]
2686 2688 if isvalid(prefix):
2687 2689 return disambiguate(hexnode, length)
2688 2690
2689 2691 def cmp(self, node, text):
2690 2692 """compare text with a given file revision
2691 2693
2692 2694 returns True if text is different than what is stored.
2693 2695 """
2694 2696 p1, p2 = self.parents(node)
2695 2697 return storageutil.hashrevisionsha1(text, p1, p2) != node
2696 2698
2697 2699 def deltaparent(self, rev):
2698 2700 """return deltaparent of the given revision"""
2699 2701 base = self.index[rev][3]
2700 2702 if base == rev:
2701 2703 return nullrev
2702 2704 elif self.delta_config.general_delta:
2703 2705 return base
2704 2706 else:
2705 2707 return rev - 1
2706 2708
2707 2709 def issnapshot(self, rev):
2708 2710 """tells whether rev is a snapshot"""
2709 2711 ret = self._inner.issnapshot(rev)
2710 2712 self.issnapshot = self._inner.issnapshot
2711 2713 return ret
2712 2714
2713 2715 def snapshotdepth(self, rev):
2714 2716 """number of snapshot in the chain before this one"""
2715 2717 if not self.issnapshot(rev):
2716 2718 raise error.ProgrammingError(b'revision %d not a snapshot')
2717 2719 return len(self._inner._deltachain(rev)[0]) - 1
2718 2720
2719 2721 def revdiff(self, rev1, rev2):
2720 2722 """return or calculate a delta between two revisions
2721 2723
2722 2724 The delta calculated is in binary form and is intended to be written to
2723 2725 revlog data directly. So this function needs raw revision data.
2724 2726 """
2725 2727 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2726 2728 return bytes(self._inner._chunk(rev2))
2727 2729
2728 2730 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2729 2731
2730 2732 def revision(self, nodeorrev):
2731 2733 """return an uncompressed revision of a given node or revision
2732 2734 number.
2733 2735 """
2734 2736 return self._revisiondata(nodeorrev)
2735 2737
2736 2738 def sidedata(self, nodeorrev):
2737 2739 """a map of extra data related to the changeset but not part of the hash
2738 2740
2739 2741 This function currently return a dictionary. However, more advanced
2740 2742 mapping object will likely be used in the future for a more
2741 2743 efficient/lazy code.
2742 2744 """
2743 2745 # deal with <nodeorrev> argument type
2744 2746 if isinstance(nodeorrev, int):
2745 2747 rev = nodeorrev
2746 2748 else:
2747 2749 rev = self.rev(nodeorrev)
2748 2750 return self._sidedata(rev)
2749 2751
2750 2752 def _rawtext(self, node, rev):
2751 2753 """return the possibly unvalidated rawtext for a revision
2752 2754
2753 2755 returns (rev, rawtext, validated)
2754 2756 """
2755 2757 # Check if we have the entry in cache
2756 2758 # The cache entry looks like (node, rev, rawtext)
2757 2759 if self._inner._revisioncache:
2758 2760 if self._inner._revisioncache[0] == node:
2759 2761 return (rev, self._inner._revisioncache[2], True)
2760 2762
2761 2763 if rev is None:
2762 2764 rev = self.rev(node)
2763 2765
2764 2766 return self._inner.raw_text(node, rev)
2765 2767
2766 2768 def _revisiondata(self, nodeorrev, raw=False):
2767 2769 # deal with <nodeorrev> argument type
2768 2770 if isinstance(nodeorrev, int):
2769 2771 rev = nodeorrev
2770 2772 node = self.node(rev)
2771 2773 else:
2772 2774 node = nodeorrev
2773 2775 rev = None
2774 2776
2775 2777 # fast path the special `nullid` rev
2776 2778 if node == self.nullid:
2777 2779 return b""
2778 2780
2779 2781 # ``rawtext`` is the text as stored inside the revlog. Might be the
2780 2782 # revision or might need to be processed to retrieve the revision.
2781 2783 rev, rawtext, validated = self._rawtext(node, rev)
2782 2784
2783 2785 if raw and validated:
2784 2786 # if we don't want to process the raw text and that raw
2785 2787 # text is cached, we can exit early.
2786 2788 return rawtext
2787 2789 if rev is None:
2788 2790 rev = self.rev(node)
2789 2791 # the revlog's flag for this revision
2790 2792 # (usually alter its state or content)
2791 2793 flags = self.flags(rev)
2792 2794
2793 2795 if validated and flags == REVIDX_DEFAULT_FLAGS:
2794 2796 # no extra flags set, no flag processor runs, text = rawtext
2795 2797 return rawtext
2796 2798
2797 2799 if raw:
2798 2800 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2799 2801 text = rawtext
2800 2802 else:
2801 2803 r = flagutil.processflagsread(self, rawtext, flags)
2802 2804 text, validatehash = r
2803 2805 if validatehash:
2804 2806 self.checkhash(text, node, rev=rev)
2805 2807 if not validated:
2806 2808 self._inner._revisioncache = (node, rev, rawtext)
2807 2809
2808 2810 return text
2809 2811
2810 2812 def _sidedata(self, rev):
2811 2813 """Return the sidedata for a given revision number."""
2812 2814 sidedata_end = None
2813 2815 if self._docket is not None:
2814 2816 sidedata_end = self._docket.sidedata_end
2815 2817 return self._inner.sidedata(rev, sidedata_end)
2816 2818
2817 2819 def rawdata(self, nodeorrev):
2818 2820 """return an uncompressed raw data of a given node or revision number."""
2819 2821 return self._revisiondata(nodeorrev, raw=True)
2820 2822
2821 2823 def hash(self, text, p1, p2):
2822 2824 """Compute a node hash.
2823 2825
2824 2826 Available as a function so that subclasses can replace the hash
2825 2827 as needed.
2826 2828 """
2827 2829 return storageutil.hashrevisionsha1(text, p1, p2)
2828 2830
2829 2831 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2830 2832 """Check node hash integrity.
2831 2833
2832 2834 Available as a function so that subclasses can extend hash mismatch
2833 2835 behaviors as needed.
2834 2836 """
2835 2837 try:
2836 2838 if p1 is None and p2 is None:
2837 2839 p1, p2 = self.parents(node)
2838 2840 if node != self.hash(text, p1, p2):
2839 2841 # Clear the revision cache on hash failure. The revision cache
2840 2842 # only stores the raw revision and clearing the cache does have
2841 2843 # the side-effect that we won't have a cache hit when the raw
2842 2844 # revision data is accessed. But this case should be rare and
2843 2845 # it is extra work to teach the cache about the hash
2844 2846 # verification state.
2845 2847 if (
2846 2848 self._inner._revisioncache
2847 2849 and self._inner._revisioncache[0] == node
2848 2850 ):
2849 2851 self._inner._revisioncache = None
2850 2852
2851 2853 revornode = rev
2852 2854 if revornode is None:
2853 2855 revornode = templatefilters.short(hex(node))
2854 2856 raise error.RevlogError(
2855 2857 _(b"integrity check failed on %s:%s")
2856 2858 % (self.display_id, pycompat.bytestr(revornode))
2857 2859 )
2858 2860 except error.RevlogError:
2859 2861 if self.feature_config.censorable and storageutil.iscensoredtext(
2860 2862 text
2861 2863 ):
2862 2864 raise error.CensoredNodeError(self.display_id, node, text)
2863 2865 raise
2864 2866
2865 2867 @property
2866 2868 def _split_index_file(self):
2867 2869 """the path where to expect the index of an ongoing splitting operation
2868 2870
2869 2871 The file will only exist if a splitting operation is in progress, but
2870 2872 it is always expected at the same location."""
2871 2873 parts = self.radix.split(b'/')
2872 2874 if len(parts) > 1:
2873 2875 # adds a '-s' prefix to the ``data/` or `meta/` base
2874 2876 head = parts[0] + b'-s'
2875 2877 mids = parts[1:-1]
2876 2878 tail = parts[-1] + b'.i'
2877 2879 pieces = [head] + mids + [tail]
2878 2880 return b'/'.join(pieces)
2879 2881 else:
2880 2882 # the revlog is stored at the root of the store (changelog or
2881 2883 # manifest), no risk of collision.
2882 2884 return self.radix + b'.i.s'
2883 2885
2884 2886 def _enforceinlinesize(self, tr):
2885 2887 """Check if the revlog is too big for inline and convert if so.
2886 2888
2887 2889 This should be called after revisions are added to the revlog. If the
2888 2890 revlog has grown too large to be an inline revlog, it will convert it
2889 2891 to use multiple index and data files.
2890 2892 """
2891 2893 tiprev = len(self) - 1
2892 2894 total_size = self.start(tiprev) + self.length(tiprev)
2893 2895 if not self._inline or (self._may_inline and total_size < _maxinline):
2894 2896 return
2895 2897
2896 2898 if self._docket is not None:
2897 2899 msg = b"inline revlog should not have a docket"
2898 2900 raise error.ProgrammingError(msg)
2899 2901
2900 2902 # In the common case, we enforce inline size because the revlog has
2901 2903 # been appened too. And in such case, it must have an initial offset
2902 2904 # recorded in the transaction.
2903 2905 troffset = tr.findoffset(self._inner.canonical_index_file)
2904 2906 pre_touched = troffset is not None
2905 2907 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2906 2908 raise error.RevlogError(
2907 2909 _(b"%s not found in the transaction") % self._indexfile
2908 2910 )
2909 2911
2910 2912 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2911 2913 tr.add(self._datafile, 0)
2912 2914
2913 2915 new_index_file_path = None
2914 2916 old_index_file_path = self._indexfile
2915 2917 new_index_file_path = self._split_index_file
2916 2918 opener = self.opener
2917 2919 weak_self = weakref.ref(self)
2918 2920
2919 2921 # the "split" index replace the real index when the transaction is
2920 2922 # finalized
2921 2923 def finalize_callback(tr):
2922 2924 opener.rename(
2923 2925 new_index_file_path,
2924 2926 old_index_file_path,
2925 2927 checkambig=True,
2926 2928 )
2927 2929 maybe_self = weak_self()
2928 2930 if maybe_self is not None:
2929 2931 maybe_self._indexfile = old_index_file_path
2930 2932 maybe_self._inner.index_file = maybe_self._indexfile
2931 2933
2932 2934 def abort_callback(tr):
2933 2935 maybe_self = weak_self()
2934 2936 if maybe_self is not None:
2935 2937 maybe_self._indexfile = old_index_file_path
2936 2938 maybe_self._inner.inline = True
2937 2939 maybe_self._inner.index_file = old_index_file_path
2938 2940
2939 2941 tr.registertmp(new_index_file_path)
2940 2942 # we use 001 here to make this this happens after the finalisation of
2941 2943 # pending changelog write (using 000). Otherwise the two finalizer
2942 2944 # would step over each other and delete the changelog.i file.
2943 2945 if self.target[1] is not None:
2944 2946 callback_id = b'001-revlog-split-%d-%s' % self.target
2945 2947 else:
2946 2948 callback_id = b'001-revlog-split-%d' % self.target[0]
2947 2949 tr.addfinalize(callback_id, finalize_callback)
2948 2950 tr.addabort(callback_id, abort_callback)
2949 2951
2950 2952 self._format_flags &= ~FLAG_INLINE_DATA
2951 2953 self._inner.split_inline(
2952 2954 tr,
2953 2955 self._format_flags | self._format_version,
2954 2956 new_index_file_path=new_index_file_path,
2955 2957 )
2956 2958
2957 2959 self._inline = False
2958 2960 if new_index_file_path is not None:
2959 2961 self._indexfile = new_index_file_path
2960 2962
2961 2963 nodemaputil.setup_persistent_nodemap(tr, self)
2962 2964
2963 2965 def _nodeduplicatecallback(self, transaction, node):
2964 2966 """called when trying to add a node already stored."""
2965 2967
2966 2968 @contextlib.contextmanager
2967 2969 def reading(self):
2968 2970 with self._inner.reading():
2969 2971 yield
2970 2972
2971 2973 @contextlib.contextmanager
2972 2974 def _writing(self, transaction):
2973 2975 if self._trypending:
2974 2976 msg = b'try to write in a `trypending` revlog: %s'
2975 2977 msg %= self.display_id
2976 2978 raise error.ProgrammingError(msg)
2977 2979 if self._inner.is_writing:
2978 2980 yield
2979 2981 else:
2980 2982 data_end = None
2981 2983 sidedata_end = None
2982 2984 if self._docket is not None:
2983 2985 data_end = self._docket.data_end
2984 2986 sidedata_end = self._docket.sidedata_end
2985 2987 with self._inner.writing(
2986 2988 transaction,
2987 2989 data_end=data_end,
2988 2990 sidedata_end=sidedata_end,
2989 2991 ):
2990 2992 yield
2991 2993 if self._docket is not None:
2992 2994 self._write_docket(transaction)
2993 2995
2994 2996 @property
2995 2997 def is_delaying(self):
2996 2998 return self._inner.is_delaying
2997 2999
2998 3000 def _write_docket(self, transaction):
2999 3001 """write the current docket on disk
3000 3002
3001 3003 Exist as a method to help changelog to implement transaction logic
3002 3004
3003 3005 We could also imagine using the same transaction logic for all revlog
3004 3006 since docket are cheap."""
3005 3007 self._docket.write(transaction)
3006 3008
3007 3009 def addrevision(
3008 3010 self,
3009 3011 text,
3010 3012 transaction,
3011 3013 link,
3012 3014 p1,
3013 3015 p2,
3014 3016 cachedelta=None,
3015 3017 node=None,
3016 3018 flags=REVIDX_DEFAULT_FLAGS,
3017 3019 deltacomputer=None,
3018 3020 sidedata=None,
3019 3021 ):
3020 3022 """add a revision to the log
3021 3023
3022 3024 text - the revision data to add
3023 3025 transaction - the transaction object used for rollback
3024 3026 link - the linkrev data to add
3025 3027 p1, p2 - the parent nodeids of the revision
3026 3028 cachedelta - an optional precomputed delta
3027 3029 node - nodeid of revision; typically node is not specified, and it is
3028 3030 computed by default as hash(text, p1, p2), however subclasses might
3029 3031 use different hashing method (and override checkhash() in such case)
3030 3032 flags - the known flags to set on the revision
3031 3033 deltacomputer - an optional deltacomputer instance shared between
3032 3034 multiple calls
3033 3035 """
3034 3036 if link == nullrev:
3035 3037 raise error.RevlogError(
3036 3038 _(b"attempted to add linkrev -1 to %s") % self.display_id
3037 3039 )
3038 3040
3039 3041 if sidedata is None:
3040 3042 sidedata = {}
3041 3043 elif sidedata and not self.feature_config.has_side_data:
3042 3044 raise error.ProgrammingError(
3043 3045 _(b"trying to add sidedata to a revlog who don't support them")
3044 3046 )
3045 3047
3046 3048 if flags:
3047 3049 node = node or self.hash(text, p1, p2)
3048 3050
3049 3051 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3050 3052
3051 3053 # If the flag processor modifies the revision data, ignore any provided
3052 3054 # cachedelta.
3053 3055 if rawtext != text:
3054 3056 cachedelta = None
3055 3057
3056 3058 if len(rawtext) > _maxentrysize:
3057 3059 raise error.RevlogError(
3058 3060 _(
3059 3061 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3060 3062 )
3061 3063 % (self.display_id, len(rawtext))
3062 3064 )
3063 3065
3064 3066 node = node or self.hash(rawtext, p1, p2)
3065 3067 rev = self.index.get_rev(node)
3066 3068 if rev is not None:
3067 3069 return rev
3068 3070
3069 3071 if validatehash:
3070 3072 self.checkhash(rawtext, node, p1=p1, p2=p2)
3071 3073
3072 3074 return self.addrawrevision(
3073 3075 rawtext,
3074 3076 transaction,
3075 3077 link,
3076 3078 p1,
3077 3079 p2,
3078 3080 node,
3079 3081 flags,
3080 3082 cachedelta=cachedelta,
3081 3083 deltacomputer=deltacomputer,
3082 3084 sidedata=sidedata,
3083 3085 )
3084 3086
3085 3087 def addrawrevision(
3086 3088 self,
3087 3089 rawtext,
3088 3090 transaction,
3089 3091 link,
3090 3092 p1,
3091 3093 p2,
3092 3094 node,
3093 3095 flags,
3094 3096 cachedelta=None,
3095 3097 deltacomputer=None,
3096 3098 sidedata=None,
3097 3099 ):
3098 3100 """add a raw revision with known flags, node and parents
3099 3101 useful when reusing a revision not stored in this revlog (ex: received
3100 3102 over wire, or read from an external bundle).
3101 3103 """
3102 3104 with self._writing(transaction):
3103 3105 return self._addrevision(
3104 3106 node,
3105 3107 rawtext,
3106 3108 transaction,
3107 3109 link,
3108 3110 p1,
3109 3111 p2,
3110 3112 flags,
3111 3113 cachedelta,
3112 3114 deltacomputer=deltacomputer,
3113 3115 sidedata=sidedata,
3114 3116 )
3115 3117
3116 3118 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
3117 3119 return self._inner.compress(data)
3118 3120
3119 3121 def decompress(self, data):
3120 3122 return self._inner.decompress(data)
3121 3123
3122 3124 def _addrevision(
3123 3125 self,
3124 3126 node,
3125 3127 rawtext,
3126 3128 transaction,
3127 3129 link,
3128 3130 p1,
3129 3131 p2,
3130 3132 flags,
3131 3133 cachedelta,
3132 3134 alwayscache=False,
3133 3135 deltacomputer=None,
3134 3136 sidedata=None,
3135 3137 ):
3136 3138 """internal function to add revisions to the log
3137 3139
3138 3140 see addrevision for argument descriptions.
3139 3141
3140 3142 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3141 3143
3142 3144 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3143 3145 be used.
3144 3146
3145 3147 invariants:
3146 3148 - rawtext is optional (can be None); if not set, cachedelta must be set.
3147 3149 if both are set, they must correspond to each other.
3148 3150 """
3149 3151 if node == self.nullid:
3150 3152 raise error.RevlogError(
3151 3153 _(b"%s: attempt to add null revision") % self.display_id
3152 3154 )
3153 3155 if (
3154 3156 node == self.nodeconstants.wdirid
3155 3157 or node in self.nodeconstants.wdirfilenodeids
3156 3158 ):
3157 3159 raise error.RevlogError(
3158 3160 _(b"%s: attempt to add wdir revision") % self.display_id
3159 3161 )
3160 3162 if self._inner._writinghandles is None:
3161 3163 msg = b'adding revision outside `revlog._writing` context'
3162 3164 raise error.ProgrammingError(msg)
3163 3165
3164 3166 btext = [rawtext]
3165 3167
3166 3168 curr = len(self)
3167 3169 prev = curr - 1
3168 3170
3169 3171 offset = self._get_data_offset(prev)
3170 3172
3171 3173 if self._concurrencychecker:
3172 3174 ifh, dfh, sdfh = self._inner._writinghandles
3173 3175 # XXX no checking for the sidedata file
3174 3176 if self._inline:
3175 3177 # offset is "as if" it were in the .d file, so we need to add on
3176 3178 # the size of the entry metadata.
3177 3179 self._concurrencychecker(
3178 3180 ifh, self._indexfile, offset + curr * self.index.entry_size
3179 3181 )
3180 3182 else:
3181 3183 # Entries in the .i are a consistent size.
3182 3184 self._concurrencychecker(
3183 3185 ifh, self._indexfile, curr * self.index.entry_size
3184 3186 )
3185 3187 self._concurrencychecker(dfh, self._datafile, offset)
3186 3188
3187 3189 p1r, p2r = self.rev(p1), self.rev(p2)
3188 3190
3189 3191 # full versions are inserted when the needed deltas
3190 3192 # become comparable to the uncompressed text
3191 3193 if rawtext is None:
3192 3194 # need rawtext size, before changed by flag processors, which is
3193 3195 # the non-raw size. use revlog explicitly to avoid filelog's extra
3194 3196 # logic that might remove metadata size.
3195 3197 textlen = mdiff.patchedsize(
3196 3198 revlog.size(self, cachedelta[0]), cachedelta[1]
3197 3199 )
3198 3200 else:
3199 3201 textlen = len(rawtext)
3200 3202
3201 3203 if deltacomputer is None:
3202 3204 write_debug = None
3203 3205 if self.delta_config.debug_delta:
3204 3206 write_debug = transaction._report
3205 3207 deltacomputer = deltautil.deltacomputer(
3206 3208 self, write_debug=write_debug
3207 3209 )
3208 3210
3209 3211 if cachedelta is not None and len(cachedelta) == 2:
3210 3212 # If the cached delta has no information about how it should be
3211 3213 # reused, add the default reuse instruction according to the
3212 3214 # revlog's configuration.
3213 3215 if (
3214 3216 self.delta_config.general_delta
3215 3217 and self.delta_config.lazy_delta_base
3216 3218 ):
3217 3219 delta_base_reuse = DELTA_BASE_REUSE_TRY
3218 3220 else:
3219 3221 delta_base_reuse = DELTA_BASE_REUSE_NO
3220 3222 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3221 3223
3222 3224 revinfo = revlogutils.revisioninfo(
3223 3225 node,
3224 3226 p1,
3225 3227 p2,
3226 3228 btext,
3227 3229 textlen,
3228 3230 cachedelta,
3229 3231 flags,
3230 3232 )
3231 3233
3232 3234 deltainfo = deltacomputer.finddeltainfo(revinfo)
3233 3235
3234 3236 compression_mode = COMP_MODE_INLINE
3235 3237 if self._docket is not None:
3236 3238 default_comp = self._docket.default_compression_header
3237 3239 r = deltautil.delta_compression(default_comp, deltainfo)
3238 3240 compression_mode, deltainfo = r
3239 3241
3240 3242 sidedata_compression_mode = COMP_MODE_INLINE
3241 3243 if sidedata and self.feature_config.has_side_data:
3242 3244 sidedata_compression_mode = COMP_MODE_PLAIN
3243 3245 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3244 3246 sidedata_offset = self._docket.sidedata_end
3245 3247 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3246 3248 if (
3247 3249 h != b'u'
3248 3250 and comp_sidedata[0:1] != b'\0'
3249 3251 and len(comp_sidedata) < len(serialized_sidedata)
3250 3252 ):
3251 3253 assert not h
3252 3254 if (
3253 3255 comp_sidedata[0:1]
3254 3256 == self._docket.default_compression_header
3255 3257 ):
3256 3258 sidedata_compression_mode = COMP_MODE_DEFAULT
3257 3259 serialized_sidedata = comp_sidedata
3258 3260 else:
3259 3261 sidedata_compression_mode = COMP_MODE_INLINE
3260 3262 serialized_sidedata = comp_sidedata
3261 3263 else:
3262 3264 serialized_sidedata = b""
3263 3265 # Don't store the offset if the sidedata is empty, that way
3264 3266 # we can easily detect empty sidedata and they will be no different
3265 3267 # than ones we manually add.
3266 3268 sidedata_offset = 0
3267 3269
3268 3270 rank = RANK_UNKNOWN
3269 3271 if self.feature_config.compute_rank:
3270 3272 if (p1r, p2r) == (nullrev, nullrev):
3271 3273 rank = 1
3272 3274 elif p1r != nullrev and p2r == nullrev:
3273 3275 rank = 1 + self.fast_rank(p1r)
3274 3276 elif p1r == nullrev and p2r != nullrev:
3275 3277 rank = 1 + self.fast_rank(p2r)
3276 3278 else: # merge node
3277 3279 if rustdagop is not None and self.index.rust_ext_compat:
3278 3280 rank = rustdagop.rank(self.index, p1r, p2r)
3279 3281 else:
3280 3282 pmin, pmax = sorted((p1r, p2r))
3281 3283 rank = 1 + self.fast_rank(pmax)
3282 3284 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3283 3285
3284 3286 e = revlogutils.entry(
3285 3287 flags=flags,
3286 3288 data_offset=offset,
3287 3289 data_compressed_length=deltainfo.deltalen,
3288 3290 data_uncompressed_length=textlen,
3289 3291 data_compression_mode=compression_mode,
3290 3292 data_delta_base=deltainfo.base,
3291 3293 link_rev=link,
3292 3294 parent_rev_1=p1r,
3293 3295 parent_rev_2=p2r,
3294 3296 node_id=node,
3295 3297 sidedata_offset=sidedata_offset,
3296 3298 sidedata_compressed_length=len(serialized_sidedata),
3297 3299 sidedata_compression_mode=sidedata_compression_mode,
3298 3300 rank=rank,
3299 3301 )
3300 3302
3301 3303 self.index.append(e)
3302 3304 entry = self.index.entry_binary(curr)
3303 3305 if curr == 0 and self._docket is None:
3304 3306 header = self._format_flags | self._format_version
3305 3307 header = self.index.pack_header(header)
3306 3308 entry = header + entry
3307 3309 self._writeentry(
3308 3310 transaction,
3309 3311 entry,
3310 3312 deltainfo.data,
3311 3313 link,
3312 3314 offset,
3313 3315 serialized_sidedata,
3314 3316 sidedata_offset,
3315 3317 )
3316 3318
3317 3319 rawtext = btext[0]
3318 3320
3319 3321 if alwayscache and rawtext is None:
3320 3322 rawtext = deltacomputer.buildtext(revinfo)
3321 3323
3322 3324 if type(rawtext) == bytes: # only accept immutable objects
3323 3325 self._inner._revisioncache = (node, curr, rawtext)
3324 3326 self._chainbasecache[curr] = deltainfo.chainbase
3325 3327 return curr
3326 3328
3327 3329 def _get_data_offset(self, prev):
3328 3330 """Returns the current offset in the (in-transaction) data file.
3329 3331 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3330 3332 file to store that information: since sidedata can be rewritten to the
3331 3333 end of the data file within a transaction, you can have cases where, for
3332 3334 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3333 3335 to `n - 1`'s sidedata being written after `n`'s data.
3334 3336
3335 3337 TODO cache this in a docket file before getting out of experimental."""
3336 3338 if self._docket is None:
3337 3339 return self.end(prev)
3338 3340 else:
3339 3341 return self._docket.data_end
3340 3342
3341 3343 def _writeentry(
3342 3344 self,
3343 3345 transaction,
3344 3346 entry,
3345 3347 data,
3346 3348 link,
3347 3349 offset,
3348 3350 sidedata,
3349 3351 sidedata_offset,
3350 3352 ):
3351 3353 # Files opened in a+ mode have inconsistent behavior on various
3352 3354 # platforms. Windows requires that a file positioning call be made
3353 3355 # when the file handle transitions between reads and writes. See
3354 3356 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3355 3357 # platforms, Python or the platform itself can be buggy. Some versions
3356 3358 # of Solaris have been observed to not append at the end of the file
3357 3359 # if the file was seeked to before the end. See issue4943 for more.
3358 3360 #
3359 3361 # We work around this issue by inserting a seek() before writing.
3360 3362 # Note: This is likely not necessary on Python 3. However, because
3361 3363 # the file handle is reused for reads and may be seeked there, we need
3362 3364 # to be careful before changing this.
3363 3365 index_end = data_end = sidedata_end = None
3364 3366 if self._docket is not None:
3365 3367 index_end = self._docket.index_end
3366 3368 data_end = self._docket.data_end
3367 3369 sidedata_end = self._docket.sidedata_end
3368 3370
3369 3371 files_end = self._inner.write_entry(
3370 3372 transaction,
3371 3373 entry,
3372 3374 data,
3373 3375 link,
3374 3376 offset,
3375 3377 sidedata,
3376 3378 sidedata_offset,
3377 3379 index_end,
3378 3380 data_end,
3379 3381 sidedata_end,
3380 3382 )
3381 3383 self._enforceinlinesize(transaction)
3382 3384 if self._docket is not None:
3383 3385 self._docket.index_end = files_end[0]
3384 3386 self._docket.data_end = files_end[1]
3385 3387 self._docket.sidedata_end = files_end[2]
3386 3388
3387 3389 nodemaputil.setup_persistent_nodemap(transaction, self)
3388 3390
3389 3391 def addgroup(
3390 3392 self,
3391 3393 deltas,
3392 3394 linkmapper,
3393 3395 transaction,
3394 3396 alwayscache=False,
3395 3397 addrevisioncb=None,
3396 3398 duplicaterevisioncb=None,
3397 3399 debug_info=None,
3398 3400 delta_base_reuse_policy=None,
3399 3401 ):
3400 3402 """
3401 3403 add a delta group
3402 3404
3403 3405 given a set of deltas, add them to the revision log. the
3404 3406 first delta is against its parent, which should be in our
3405 3407 log, the rest are against the previous delta.
3406 3408
3407 3409 If ``addrevisioncb`` is defined, it will be called with arguments of
3408 3410 this revlog and the node that was added.
3409 3411 """
3410 3412
3411 3413 if self._adding_group:
3412 3414 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3413 3415
3414 3416 # read the default delta-base reuse policy from revlog config if the
3415 3417 # group did not specify one.
3416 3418 if delta_base_reuse_policy is None:
3417 3419 if (
3418 3420 self.delta_config.general_delta
3419 3421 and self.delta_config.lazy_delta_base
3420 3422 ):
3421 3423 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3422 3424 else:
3423 3425 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3424 3426
3425 3427 self._adding_group = True
3426 3428 empty = True
3427 3429 try:
3428 3430 with self._writing(transaction):
3429 3431 write_debug = None
3430 3432 if self.delta_config.debug_delta:
3431 3433 write_debug = transaction._report
3432 3434 deltacomputer = deltautil.deltacomputer(
3433 3435 self,
3434 3436 write_debug=write_debug,
3435 3437 debug_info=debug_info,
3436 3438 )
3437 3439 # loop through our set of deltas
3438 3440 for data in deltas:
3439 3441 (
3440 3442 node,
3441 3443 p1,
3442 3444 p2,
3443 3445 linknode,
3444 3446 deltabase,
3445 3447 delta,
3446 3448 flags,
3447 3449 sidedata,
3448 3450 ) = data
3449 3451 link = linkmapper(linknode)
3450 3452 flags = flags or REVIDX_DEFAULT_FLAGS
3451 3453
3452 3454 rev = self.index.get_rev(node)
3453 3455 if rev is not None:
3454 3456 # this can happen if two branches make the same change
3455 3457 self._nodeduplicatecallback(transaction, rev)
3456 3458 if duplicaterevisioncb:
3457 3459 duplicaterevisioncb(self, rev)
3458 3460 empty = False
3459 3461 continue
3460 3462
3461 3463 for p in (p1, p2):
3462 3464 if not self.index.has_node(p):
3463 3465 raise error.LookupError(
3464 3466 p, self.radix, _(b'unknown parent')
3465 3467 )
3466 3468
3467 3469 if not self.index.has_node(deltabase):
3468 3470 raise error.LookupError(
3469 3471 deltabase, self.display_id, _(b'unknown delta base')
3470 3472 )
3471 3473
3472 3474 baserev = self.rev(deltabase)
3473 3475
3474 3476 if baserev != nullrev and self.iscensored(baserev):
3475 3477 # if base is censored, delta must be full replacement in a
3476 3478 # single patch operation
3477 3479 hlen = struct.calcsize(b">lll")
3478 3480 oldlen = self.rawsize(baserev)
3479 3481 newlen = len(delta) - hlen
3480 3482 if delta[:hlen] != mdiff.replacediffheader(
3481 3483 oldlen, newlen
3482 3484 ):
3483 3485 raise error.CensoredBaseError(
3484 3486 self.display_id, self.node(baserev)
3485 3487 )
3486 3488
3487 3489 if not flags and self._peek_iscensored(baserev, delta):
3488 3490 flags |= REVIDX_ISCENSORED
3489 3491
3490 3492 # We assume consumers of addrevisioncb will want to retrieve
3491 3493 # the added revision, which will require a call to
3492 3494 # revision(). revision() will fast path if there is a cache
3493 3495 # hit. So, we tell _addrevision() to always cache in this case.
3494 3496 # We're only using addgroup() in the context of changegroup
3495 3497 # generation so the revision data can always be handled as raw
3496 3498 # by the flagprocessor.
3497 3499 rev = self._addrevision(
3498 3500 node,
3499 3501 None,
3500 3502 transaction,
3501 3503 link,
3502 3504 p1,
3503 3505 p2,
3504 3506 flags,
3505 3507 (baserev, delta, delta_base_reuse_policy),
3506 3508 alwayscache=alwayscache,
3507 3509 deltacomputer=deltacomputer,
3508 3510 sidedata=sidedata,
3509 3511 )
3510 3512
3511 3513 if addrevisioncb:
3512 3514 addrevisioncb(self, rev)
3513 3515 empty = False
3514 3516 finally:
3515 3517 self._adding_group = False
3516 3518 return not empty
3517 3519
3518 3520 def iscensored(self, rev):
3519 3521 """Check if a file revision is censored."""
3520 3522 if not self.feature_config.censorable:
3521 3523 return False
3522 3524
3523 3525 return self.flags(rev) & REVIDX_ISCENSORED
3524 3526
3525 3527 def _peek_iscensored(self, baserev, delta):
3526 3528 """Quickly check if a delta produces a censored revision."""
3527 3529 if not self.feature_config.censorable:
3528 3530 return False
3529 3531
3530 3532 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3531 3533
3532 3534 def getstrippoint(self, minlink):
3533 3535 """find the minimum rev that must be stripped to strip the linkrev
3534 3536
3535 3537 Returns a tuple containing the minimum rev and a set of all revs that
3536 3538 have linkrevs that will be broken by this strip.
3537 3539 """
3538 3540 return storageutil.resolvestripinfo(
3539 3541 minlink,
3540 3542 len(self) - 1,
3541 3543 self.headrevs(),
3542 3544 self.linkrev,
3543 3545 self.parentrevs,
3544 3546 )
3545 3547
3546 3548 def strip(self, minlink, transaction):
3547 3549 """truncate the revlog on the first revision with a linkrev >= minlink
3548 3550
3549 3551 This function is called when we're stripping revision minlink and
3550 3552 its descendants from the repository.
3551 3553
3552 3554 We have to remove all revisions with linkrev >= minlink, because
3553 3555 the equivalent changelog revisions will be renumbered after the
3554 3556 strip.
3555 3557
3556 3558 So we truncate the revlog on the first of these revisions, and
3557 3559 trust that the caller has saved the revisions that shouldn't be
3558 3560 removed and that it'll re-add them after this truncation.
3559 3561 """
3560 3562 if len(self) == 0:
3561 3563 return
3562 3564
3563 3565 rev, _ = self.getstrippoint(minlink)
3564 3566 if rev == len(self):
3565 3567 return
3566 3568
3567 3569 # first truncate the files on disk
3568 3570 data_end = self.start(rev)
3569 3571 if not self._inline:
3570 3572 transaction.add(self._datafile, data_end)
3571 3573 end = rev * self.index.entry_size
3572 3574 else:
3573 3575 end = data_end + (rev * self.index.entry_size)
3574 3576
3575 3577 if self._sidedatafile:
3576 3578 sidedata_end = self.sidedata_cut_off(rev)
3577 3579 transaction.add(self._sidedatafile, sidedata_end)
3578 3580
3579 3581 transaction.add(self._indexfile, end)
3580 3582 if self._docket is not None:
3581 3583 # XXX we could, leverage the docket while stripping. However it is
3582 3584 # not powerfull enough at the time of this comment
3583 3585 self._docket.index_end = end
3584 3586 self._docket.data_end = data_end
3585 3587 self._docket.sidedata_end = sidedata_end
3586 3588 self._docket.write(transaction, stripping=True)
3587 3589
3588 3590 # then reset internal state in memory to forget those revisions
3589 3591 self._chaininfocache = util.lrucachedict(500)
3590 3592 self._inner.clear_cache()
3591 3593
3592 3594 del self.index[rev:-1]
3593 3595
3594 3596 def checksize(self):
3595 3597 """Check size of index and data files
3596 3598
3597 3599 return a (dd, di) tuple.
3598 3600 - dd: extra bytes for the "data" file
3599 3601 - di: extra bytes for the "index" file
3600 3602
3601 3603 A healthy revlog will return (0, 0).
3602 3604 """
3603 3605 expected = 0
3604 3606 if len(self):
3605 3607 expected = max(0, self.end(len(self) - 1))
3606 3608
3607 3609 try:
3608 3610 with self._datafp() as f:
3609 3611 f.seek(0, io.SEEK_END)
3610 3612 actual = f.tell()
3611 3613 dd = actual - expected
3612 3614 except FileNotFoundError:
3613 3615 dd = 0
3614 3616
3615 3617 try:
3616 3618 f = self.opener(self._indexfile)
3617 3619 f.seek(0, io.SEEK_END)
3618 3620 actual = f.tell()
3619 3621 f.close()
3620 3622 s = self.index.entry_size
3621 3623 i = max(0, actual // s)
3622 3624 di = actual - (i * s)
3623 3625 if self._inline:
3624 3626 databytes = 0
3625 3627 for r in self:
3626 3628 databytes += max(0, self.length(r))
3627 3629 dd = 0
3628 3630 di = actual - len(self) * s - databytes
3629 3631 except FileNotFoundError:
3630 3632 di = 0
3631 3633
3632 3634 return (dd, di)
3633 3635
3634 3636 def files(self):
3635 3637 """return list of files that compose this revlog"""
3636 3638 res = [self._indexfile]
3637 3639 if self._docket_file is None:
3638 3640 if not self._inline:
3639 3641 res.append(self._datafile)
3640 3642 else:
3641 3643 res.append(self._docket_file)
3642 3644 res.extend(self._docket.old_index_filepaths(include_empty=False))
3643 3645 if self._docket.data_end:
3644 3646 res.append(self._datafile)
3645 3647 res.extend(self._docket.old_data_filepaths(include_empty=False))
3646 3648 if self._docket.sidedata_end:
3647 3649 res.append(self._sidedatafile)
3648 3650 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3649 3651 return res
3650 3652
3651 3653 def emitrevisions(
3652 3654 self,
3653 3655 nodes,
3654 3656 nodesorder=None,
3655 3657 revisiondata=False,
3656 3658 assumehaveparentrevisions=False,
3657 3659 deltamode=repository.CG_DELTAMODE_STD,
3658 3660 sidedata_helpers=None,
3659 3661 debug_info=None,
3660 3662 ):
3661 3663 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3662 3664 raise error.ProgrammingError(
3663 3665 b'unhandled value for nodesorder: %s' % nodesorder
3664 3666 )
3665 3667
3666 3668 if nodesorder is None and not self.delta_config.general_delta:
3667 3669 nodesorder = b'storage'
3668 3670
3669 3671 if (
3670 3672 not self._storedeltachains
3671 3673 and deltamode != repository.CG_DELTAMODE_PREV
3672 3674 ):
3673 3675 deltamode = repository.CG_DELTAMODE_FULL
3674 3676
3675 3677 return storageutil.emitrevisions(
3676 3678 self,
3677 3679 nodes,
3678 3680 nodesorder,
3679 3681 revlogrevisiondelta,
3680 3682 deltaparentfn=self.deltaparent,
3681 3683 candeltafn=self._candelta,
3682 3684 rawsizefn=self.rawsize,
3683 3685 revdifffn=self.revdiff,
3684 3686 flagsfn=self.flags,
3685 3687 deltamode=deltamode,
3686 3688 revisiondata=revisiondata,
3687 3689 assumehaveparentrevisions=assumehaveparentrevisions,
3688 3690 sidedata_helpers=sidedata_helpers,
3689 3691 debug_info=debug_info,
3690 3692 )
3691 3693
3692 3694 DELTAREUSEALWAYS = b'always'
3693 3695 DELTAREUSESAMEREVS = b'samerevs'
3694 3696 DELTAREUSENEVER = b'never'
3695 3697
3696 3698 DELTAREUSEFULLADD = b'fulladd'
3697 3699
3698 3700 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3699 3701
3700 3702 def clone(
3701 3703 self,
3702 3704 tr,
3703 3705 destrevlog,
3704 3706 addrevisioncb=None,
3705 3707 deltareuse=DELTAREUSESAMEREVS,
3706 3708 forcedeltabothparents=None,
3707 3709 sidedata_helpers=None,
3708 3710 ):
3709 3711 """Copy this revlog to another, possibly with format changes.
3710 3712
3711 3713 The destination revlog will contain the same revisions and nodes.
3712 3714 However, it may not be bit-for-bit identical due to e.g. delta encoding
3713 3715 differences.
3714 3716
3715 3717 The ``deltareuse`` argument control how deltas from the existing revlog
3716 3718 are preserved in the destination revlog. The argument can have the
3717 3719 following values:
3718 3720
3719 3721 DELTAREUSEALWAYS
3720 3722 Deltas will always be reused (if possible), even if the destination
3721 3723 revlog would not select the same revisions for the delta. This is the
3722 3724 fastest mode of operation.
3723 3725 DELTAREUSESAMEREVS
3724 3726 Deltas will be reused if the destination revlog would pick the same
3725 3727 revisions for the delta. This mode strikes a balance between speed
3726 3728 and optimization.
3727 3729 DELTAREUSENEVER
3728 3730 Deltas will never be reused. This is the slowest mode of execution.
3729 3731 This mode can be used to recompute deltas (e.g. if the diff/delta
3730 3732 algorithm changes).
3731 3733 DELTAREUSEFULLADD
3732 3734 Revision will be re-added as if their were new content. This is
3733 3735 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3734 3736 eg: large file detection and handling.
3735 3737
3736 3738 Delta computation can be slow, so the choice of delta reuse policy can
3737 3739 significantly affect run time.
3738 3740
3739 3741 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3740 3742 two extremes. Deltas will be reused if they are appropriate. But if the
3741 3743 delta could choose a better revision, it will do so. This means if you
3742 3744 are converting a non-generaldelta revlog to a generaldelta revlog,
3743 3745 deltas will be recomputed if the delta's parent isn't a parent of the
3744 3746 revision.
3745 3747
3746 3748 In addition to the delta policy, the ``forcedeltabothparents``
3747 3749 argument controls whether to force compute deltas against both parents
3748 3750 for merges. By default, the current default is used.
3749 3751
3750 3752 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3751 3753 `sidedata_helpers`.
3752 3754 """
3753 3755 if deltareuse not in self.DELTAREUSEALL:
3754 3756 raise ValueError(
3755 3757 _(b'value for deltareuse invalid: %s') % deltareuse
3756 3758 )
3757 3759
3758 3760 if len(destrevlog):
3759 3761 raise ValueError(_(b'destination revlog is not empty'))
3760 3762
3761 3763 if getattr(self, 'filteredrevs', None):
3762 3764 raise ValueError(_(b'source revlog has filtered revisions'))
3763 3765 if getattr(destrevlog, 'filteredrevs', None):
3764 3766 raise ValueError(_(b'destination revlog has filtered revisions'))
3765 3767
3766 3768 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3767 3769 # if possible.
3768 3770 old_delta_config = destrevlog.delta_config
3769 3771 destrevlog.delta_config = destrevlog.delta_config.copy()
3770 3772
3771 3773 try:
3772 3774 if deltareuse == self.DELTAREUSEALWAYS:
3773 3775 destrevlog.delta_config.lazy_delta_base = True
3774 3776 destrevlog.delta_config.lazy_delta = True
3775 3777 elif deltareuse == self.DELTAREUSESAMEREVS:
3776 3778 destrevlog.delta_config.lazy_delta_base = False
3777 3779 destrevlog.delta_config.lazy_delta = True
3778 3780 elif deltareuse == self.DELTAREUSENEVER:
3779 3781 destrevlog.delta_config.lazy_delta_base = False
3780 3782 destrevlog.delta_config.lazy_delta = False
3781 3783
3782 3784 delta_both_parents = (
3783 3785 forcedeltabothparents or old_delta_config.delta_both_parents
3784 3786 )
3785 3787 destrevlog.delta_config.delta_both_parents = delta_both_parents
3786 3788
3787 3789 with self.reading(), destrevlog._writing(tr):
3788 3790 self._clone(
3789 3791 tr,
3790 3792 destrevlog,
3791 3793 addrevisioncb,
3792 3794 deltareuse,
3793 3795 forcedeltabothparents,
3794 3796 sidedata_helpers,
3795 3797 )
3796 3798
3797 3799 finally:
3798 3800 destrevlog.delta_config = old_delta_config
3799 3801
3800 3802 def _clone(
3801 3803 self,
3802 3804 tr,
3803 3805 destrevlog,
3804 3806 addrevisioncb,
3805 3807 deltareuse,
3806 3808 forcedeltabothparents,
3807 3809 sidedata_helpers,
3808 3810 ):
3809 3811 """perform the core duty of `revlog.clone` after parameter processing"""
3810 3812 write_debug = None
3811 3813 if self.delta_config.debug_delta:
3812 3814 write_debug = tr._report
3813 3815 deltacomputer = deltautil.deltacomputer(
3814 3816 destrevlog,
3815 3817 write_debug=write_debug,
3816 3818 )
3817 3819 index = self.index
3818 3820 for rev in self:
3819 3821 entry = index[rev]
3820 3822
3821 3823 # Some classes override linkrev to take filtered revs into
3822 3824 # account. Use raw entry from index.
3823 3825 flags = entry[0] & 0xFFFF
3824 3826 linkrev = entry[4]
3825 3827 p1 = index[entry[5]][7]
3826 3828 p2 = index[entry[6]][7]
3827 3829 node = entry[7]
3828 3830
3829 3831 # (Possibly) reuse the delta from the revlog if allowed and
3830 3832 # the revlog chunk is a delta.
3831 3833 cachedelta = None
3832 3834 rawtext = None
3833 3835 if deltareuse == self.DELTAREUSEFULLADD:
3834 3836 text = self._revisiondata(rev)
3835 3837 sidedata = self.sidedata(rev)
3836 3838
3837 3839 if sidedata_helpers is not None:
3838 3840 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3839 3841 self, sidedata_helpers, sidedata, rev
3840 3842 )
3841 3843 flags = flags | new_flags[0] & ~new_flags[1]
3842 3844
3843 3845 destrevlog.addrevision(
3844 3846 text,
3845 3847 tr,
3846 3848 linkrev,
3847 3849 p1,
3848 3850 p2,
3849 3851 cachedelta=cachedelta,
3850 3852 node=node,
3851 3853 flags=flags,
3852 3854 deltacomputer=deltacomputer,
3853 3855 sidedata=sidedata,
3854 3856 )
3855 3857 else:
3856 3858 if destrevlog.delta_config.lazy_delta:
3857 3859 dp = self.deltaparent(rev)
3858 3860 if dp != nullrev:
3859 3861 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3860 3862
3861 3863 sidedata = None
3862 3864 if not cachedelta:
3863 3865 try:
3864 3866 rawtext = self._revisiondata(rev)
3865 3867 except error.CensoredNodeError as censored:
3866 3868 assert flags & REVIDX_ISCENSORED
3867 3869 rawtext = censored.tombstone
3868 3870 sidedata = self.sidedata(rev)
3869 3871 if sidedata is None:
3870 3872 sidedata = self.sidedata(rev)
3871 3873
3872 3874 if sidedata_helpers is not None:
3873 3875 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3874 3876 self, sidedata_helpers, sidedata, rev
3875 3877 )
3876 3878 flags = flags | new_flags[0] & ~new_flags[1]
3877 3879
3878 3880 destrevlog._addrevision(
3879 3881 node,
3880 3882 rawtext,
3881 3883 tr,
3882 3884 linkrev,
3883 3885 p1,
3884 3886 p2,
3885 3887 flags,
3886 3888 cachedelta,
3887 3889 deltacomputer=deltacomputer,
3888 3890 sidedata=sidedata,
3889 3891 )
3890 3892
3891 3893 if addrevisioncb:
3892 3894 addrevisioncb(self, rev, node)
3893 3895
3894 3896 def censorrevision(self, tr, censor_nodes, tombstone=b''):
3895 3897 if self._format_version == REVLOGV0:
3896 3898 raise error.RevlogError(
3897 3899 _(b'cannot censor with version %d revlogs')
3898 3900 % self._format_version
3899 3901 )
3900 3902 elif self._format_version == REVLOGV1:
3901 3903 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
3902 3904 else:
3903 3905 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
3904 3906
3905 def verifyintegrity(self, state):
3907 def verifyintegrity(self, state) -> Iterable[RevLogProblem]:
3906 3908 """Verifies the integrity of the revlog.
3907 3909
3908 3910 Yields ``revlogproblem`` instances describing problems that are
3909 3911 found.
3910 3912 """
3911 3913 dd, di = self.checksize()
3912 3914 if dd:
3913 3915 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3914 3916 if di:
3915 3917 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3916 3918
3917 3919 version = self._format_version
3918 3920
3919 3921 # The verifier tells us what version revlog we should be.
3920 3922 if version != state[b'expectedversion']:
3921 3923 yield revlogproblem(
3922 3924 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3923 3925 % (self.display_id, version, state[b'expectedversion'])
3924 3926 )
3925 3927
3926 3928 state[b'skipread'] = set()
3927 3929 state[b'safe_renamed'] = set()
3928 3930
3929 3931 for rev in self:
3930 3932 node = self.node(rev)
3931 3933
3932 3934 # Verify contents. 4 cases to care about:
3933 3935 #
3934 3936 # common: the most common case
3935 3937 # rename: with a rename
3936 3938 # meta: file content starts with b'\1\n', the metadata
3937 3939 # header defined in filelog.py, but without a rename
3938 3940 # ext: content stored externally
3939 3941 #
3940 3942 # More formally, their differences are shown below:
3941 3943 #
3942 3944 # | common | rename | meta | ext
3943 3945 # -------------------------------------------------------
3944 3946 # flags() | 0 | 0 | 0 | not 0
3945 3947 # renamed() | False | True | False | ?
3946 3948 # rawtext[0:2]=='\1\n'| False | True | True | ?
3947 3949 #
3948 3950 # "rawtext" means the raw text stored in revlog data, which
3949 3951 # could be retrieved by "rawdata(rev)". "text"
3950 3952 # mentioned below is "revision(rev)".
3951 3953 #
3952 3954 # There are 3 different lengths stored physically:
3953 3955 # 1. L1: rawsize, stored in revlog index
3954 3956 # 2. L2: len(rawtext), stored in revlog data
3955 3957 # 3. L3: len(text), stored in revlog data if flags==0, or
3956 3958 # possibly somewhere else if flags!=0
3957 3959 #
3958 3960 # L1 should be equal to L2. L3 could be different from them.
3959 3961 # "text" may or may not affect commit hash depending on flag
3960 3962 # processors (see flagutil.addflagprocessor).
3961 3963 #
3962 3964 # | common | rename | meta | ext
3963 3965 # -------------------------------------------------
3964 3966 # rawsize() | L1 | L1 | L1 | L1
3965 3967 # size() | L1 | L2-LM | L1(*) | L1 (?)
3966 3968 # len(rawtext) | L2 | L2 | L2 | L2
3967 3969 # len(text) | L2 | L2 | L2 | L3
3968 3970 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3969 3971 #
3970 3972 # LM: length of metadata, depending on rawtext
3971 3973 # (*): not ideal, see comment in filelog.size
3972 3974 # (?): could be "- len(meta)" if the resolved content has
3973 3975 # rename metadata
3974 3976 #
3975 3977 # Checks needed to be done:
3976 3978 # 1. length check: L1 == L2, in all cases.
3977 3979 # 2. hash check: depending on flag processor, we may need to
3978 3980 # use either "text" (external), or "rawtext" (in revlog).
3979 3981
3980 3982 try:
3981 3983 skipflags = state.get(b'skipflags', 0)
3982 3984 if skipflags:
3983 3985 skipflags &= self.flags(rev)
3984 3986
3985 3987 _verify_revision(self, skipflags, state, node)
3986 3988
3987 3989 l1 = self.rawsize(rev)
3988 3990 l2 = len(self.rawdata(node))
3989 3991
3990 3992 if l1 != l2:
3991 3993 yield revlogproblem(
3992 3994 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3993 3995 node=node,
3994 3996 )
3995 3997
3996 3998 except error.CensoredNodeError:
3997 3999 if state[b'erroroncensored']:
3998 4000 yield revlogproblem(
3999 4001 error=_(b'censored file data'), node=node
4000 4002 )
4001 4003 state[b'skipread'].add(node)
4002 4004 except Exception as e:
4003 4005 yield revlogproblem(
4004 4006 error=_(b'unpacking %s: %s')
4005 4007 % (short(node), stringutil.forcebytestr(e)),
4006 4008 node=node,
4007 4009 )
4008 4010 state[b'skipread'].add(node)
4009 4011
4010 4012 def storageinfo(
4011 4013 self,
4012 4014 exclusivefiles=False,
4013 4015 sharedfiles=False,
4014 4016 revisionscount=False,
4015 4017 trackedsize=False,
4016 4018 storedsize=False,
4017 4019 ):
4018 4020 d = {}
4019 4021
4020 4022 if exclusivefiles:
4021 4023 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4022 4024 if not self._inline:
4023 4025 d[b'exclusivefiles'].append((self.opener, self._datafile))
4024 4026
4025 4027 if sharedfiles:
4026 4028 d[b'sharedfiles'] = []
4027 4029
4028 4030 if revisionscount:
4029 4031 d[b'revisionscount'] = len(self)
4030 4032
4031 4033 if trackedsize:
4032 4034 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4033 4035
4034 4036 if storedsize:
4035 4037 d[b'storedsize'] = sum(
4036 4038 self.opener.stat(path).st_size for path in self.files()
4037 4039 )
4038 4040
4039 4041 return d
4040 4042
4041 4043 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4042 4044 if not self.feature_config.has_side_data:
4043 4045 return
4044 4046 # revlog formats with sidedata support does not support inline
4045 4047 assert not self._inline
4046 4048 if not helpers[1] and not helpers[2]:
4047 4049 # Nothing to generate or remove
4048 4050 return
4049 4051
4050 4052 new_entries = []
4051 4053 # append the new sidedata
4052 4054 with self._writing(transaction):
4053 4055 ifh, dfh, sdfh = self._inner._writinghandles
4054 4056 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4055 4057
4056 4058 current_offset = sdfh.tell()
4057 4059 for rev in range(startrev, endrev + 1):
4058 4060 entry = self.index[rev]
4059 4061 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4060 4062 store=self,
4061 4063 sidedata_helpers=helpers,
4062 4064 sidedata={},
4063 4065 rev=rev,
4064 4066 )
4065 4067
4066 4068 serialized_sidedata = sidedatautil.serialize_sidedata(
4067 4069 new_sidedata
4068 4070 )
4069 4071
4070 4072 sidedata_compression_mode = COMP_MODE_INLINE
4071 4073 if serialized_sidedata and self.feature_config.has_side_data:
4072 4074 sidedata_compression_mode = COMP_MODE_PLAIN
4073 4075 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4074 4076 if (
4075 4077 h != b'u'
4076 4078 and comp_sidedata[0] != b'\0'
4077 4079 and len(comp_sidedata) < len(serialized_sidedata)
4078 4080 ):
4079 4081 assert not h
4080 4082 if (
4081 4083 comp_sidedata[0]
4082 4084 == self._docket.default_compression_header
4083 4085 ):
4084 4086 sidedata_compression_mode = COMP_MODE_DEFAULT
4085 4087 serialized_sidedata = comp_sidedata
4086 4088 else:
4087 4089 sidedata_compression_mode = COMP_MODE_INLINE
4088 4090 serialized_sidedata = comp_sidedata
4089 4091 if entry[8] != 0 or entry[9] != 0:
4090 4092 # rewriting entries that already have sidedata is not
4091 4093 # supported yet, because it introduces garbage data in the
4092 4094 # revlog.
4093 4095 msg = b"rewriting existing sidedata is not supported yet"
4094 4096 raise error.Abort(msg)
4095 4097
4096 4098 # Apply (potential) flags to add and to remove after running
4097 4099 # the sidedata helpers
4098 4100 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4099 4101 entry_update = (
4100 4102 current_offset,
4101 4103 len(serialized_sidedata),
4102 4104 new_offset_flags,
4103 4105 sidedata_compression_mode,
4104 4106 )
4105 4107
4106 4108 # the sidedata computation might have move the file cursors around
4107 4109 sdfh.seek(current_offset, os.SEEK_SET)
4108 4110 sdfh.write(serialized_sidedata)
4109 4111 new_entries.append(entry_update)
4110 4112 current_offset += len(serialized_sidedata)
4111 4113 self._docket.sidedata_end = sdfh.tell()
4112 4114
4113 4115 # rewrite the new index entries
4114 4116 ifh.seek(startrev * self.index.entry_size)
4115 4117 for i, e in enumerate(new_entries):
4116 4118 rev = startrev + i
4117 4119 self.index.replace_sidedata_info(rev, *e)
4118 4120 packed = self.index.entry_binary(rev)
4119 4121 if rev == 0 and self._docket is None:
4120 4122 header = self._format_flags | self._format_version
4121 4123 header = self.index.pack_header(header)
4122 4124 packed = header + packed
4123 4125 ifh.write(packed)
@@ -1,271 +1,273 b''
1 1 # statichttprepo.py - simple http repository class for mercurial
2 2 #
3 3 # This provides read-only repo access to repositories exported via static http
4 4 #
5 5 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10
11 11 import errno
12 12
13 13 from .i18n import _
14 14 from .node import sha1nodeconstants
15 15 from . import (
16 16 branchmap,
17 17 changelog,
18 18 error,
19 19 localrepo,
20 20 manifest,
21 21 namespaces,
22 22 pathutil,
23 23 pycompat,
24 24 requirements as requirementsmod,
25 25 url,
26 26 util,
27 27 vfs as vfsmod,
28 28 )
29 29 from .utils import (
30 30 urlutil,
31 31 )
32 32
33 33 urlerr = util.urlerr
34 34 urlreq = util.urlreq
35 35
36 36
37 37 class httprangereader:
38 38 def __init__(self, url, opener):
39 39 # we assume opener has HTTPRangeHandler
40 40 self.url = url
41 41 self.pos = 0
42 42 self.opener = opener
43 43 self.name = url
44 44
45 45 def __enter__(self):
46 46 return self
47 47
48 48 def __exit__(self, exc_type, exc_value, traceback):
49 49 self.close()
50 50
51 51 def seek(self, pos):
52 52 self.pos = pos
53 53
54 54 def read(self, bytes=None):
55 55 req = urlreq.request(pycompat.strurl(self.url))
56 56 end = b''
57 57 if bytes:
58 58 end = self.pos + bytes - 1
59 59 if self.pos or end:
60 60 req.add_header('Range', 'bytes=%d-%s' % (self.pos, end))
61 61
62 62 try:
63 63 f = self.opener.open(req)
64 64 data = f.read()
65 65 code = f.code
66 66 except urlerr.httperror as inst:
67 67 num = inst.code == 404 and errno.ENOENT or None
68 68 # Explicitly convert the exception to str as Py3 will try
69 69 # convert it to local encoding and with as the HTTPResponse
70 70 # instance doesn't support encode.
71 71 raise IOError(num, str(inst))
72 72 except urlerr.urlerror as inst:
73 73 raise IOError(None, inst.reason)
74 74
75 75 if code == 200:
76 76 # HTTPRangeHandler does nothing if remote does not support
77 77 # Range headers and returns the full entity. Let's slice it.
78 78 if bytes:
79 79 data = data[self.pos : self.pos + bytes]
80 80 else:
81 81 data = data[self.pos :]
82 82 elif bytes:
83 83 data = data[:bytes]
84 84 self.pos += len(data)
85 85 return data
86 86
87 87 def readlines(self):
88 88 return self.read().splitlines(True)
89 89
90 90 def __iter__(self):
91 91 return iter(self.readlines())
92 92
93 93 def close(self):
94 94 pass
95 95
96 96
97 97 # _RangeError and _HTTPRangeHandler were originally in byterange.py,
98 98 # which was itself extracted from urlgrabber. See the last version of
99 99 # byterange.py from history if you need more information.
100 100 class _RangeError(IOError):
101 101 """Error raised when an unsatisfiable range is requested."""
102 102
103 103
104 104 class _HTTPRangeHandler(urlreq.basehandler):
105 105 """Handler that enables HTTP Range headers.
106 106
107 107 This was extremely simple. The Range header is a HTTP feature to
108 108 begin with so all this class does is tell urllib2 that the
109 109 "206 Partial Content" response from the HTTP server is what we
110 110 expected.
111 111 """
112 112
113 113 def http_error_206(self, req, fp, code, msg, hdrs):
114 114 # 206 Partial Content Response
115 115 r = urlreq.addinfourl(fp, hdrs, req.get_full_url())
116 116 r.code = code
117 117 r.msg = msg
118 118 return r
119 119
120 120 def http_error_416(self, req, fp, code, msg, hdrs):
121 121 # HTTP's Range Not Satisfiable error
122 122 raise _RangeError('Requested Range Not Satisfiable')
123 123
124 124
125 125 def build_opener(ui, authinfo):
126 126 # urllib cannot handle URLs with embedded user or passwd
127 127 urlopener = url.opener(ui, authinfo)
128 128 urlopener.add_handler(_HTTPRangeHandler())
129 129
130 130 class statichttpvfs(vfsmod.abstractvfs):
131 131 def __init__(self, base):
132 132 self.base = base
133 133 self.options = {}
134 134
135 135 def __call__(self, path, mode=b'r', *args, **kw):
136 136 if mode not in (b'r', b'rb'):
137 137 raise IOError('Permission denied')
138 138 f = b"/".join((self.base, urlreq.quote(path)))
139 139 return httprangereader(f, urlopener)
140 140
141 141 def join(self, path, *insidef):
142 142 if path:
143 143 return pathutil.join(self.base, path, *insidef)
144 144 else:
145 145 return self.base
146 146
147 147 return statichttpvfs
148 148
149 149
150 150 class statichttppeer(localrepo.localpeer):
151 151 def local(self):
152 152 return None
153 153
154 154 def canpush(self):
155 155 return False
156 156
157 157
158 158 class statichttprepository(
159 159 localrepo.localrepository, localrepo.revlogfilestorage
160 160 ):
161 161 supported = localrepo.localrepository._basesupported
162 162
163 manifestlog: manifest.ManifestLog
164
163 165 def __init__(self, ui, path):
164 166 self._url = path
165 167 self.ui = ui
166 168
167 169 self.root = path
168 170 u = urlutil.url(path.rstrip(b'/') + b"/.hg")
169 171 self.path, authinfo = u.authinfo()
170 172
171 173 vfsclass = build_opener(ui, authinfo)
172 174 self.vfs = vfsclass(self.path)
173 175 self.cachevfs = vfsclass(self.vfs.join(b'cache'))
174 176 self._phasedefaults = []
175 177
176 178 self.names = namespaces.namespaces()
177 179 self.filtername = None
178 180 self._extrafilterid = None
179 181 self._wanted_sidedata = set()
180 182 self.features = set()
181 183
182 184 try:
183 185 requirements = set(self.vfs.read(b'requires').splitlines())
184 186 except FileNotFoundError:
185 187 requirements = set()
186 188
187 189 # check if it is a non-empty old-style repository
188 190 try:
189 191 fp = self.vfs(b"00changelog.i")
190 192 fp.read(1)
191 193 fp.close()
192 194 except FileNotFoundError:
193 195 # we do not care about empty old-style repositories here
194 196 msg = _(b"'%s' does not appear to be an hg repository") % path
195 197 raise error.RepoError(msg)
196 198 if requirementsmod.SHARESAFE_REQUIREMENT in requirements:
197 199 storevfs = vfsclass(self.vfs.join(b'store'))
198 200 requirements |= set(storevfs.read(b'requires').splitlines())
199 201
200 202 supportedrequirements = localrepo.gathersupportedrequirements(ui)
201 203 localrepo.ensurerequirementsrecognized(
202 204 requirements, supportedrequirements
203 205 )
204 206 localrepo.ensurerequirementscompatible(ui, requirements)
205 207 self.nodeconstants = sha1nodeconstants
206 208 self.nullid = self.nodeconstants.nullid
207 209
208 210 # setup store
209 211 self.store = localrepo.makestore(requirements, self.path, vfsclass)
210 212 self.spath = self.store.path
211 213 self.svfs = self.store.opener
212 214 self.sjoin = self.store.join
213 215 self._filecache = {}
214 216 self.requirements = requirements
215 217
216 218 rootmanifest = manifest.manifestrevlog(self.nodeconstants, self.svfs)
217 219 self.manifestlog = manifest.manifestlog(
218 220 self.svfs, self, rootmanifest, self.narrowmatch()
219 221 )
220 222 self.changelog = changelog.changelog(self.svfs)
221 223 self._tags = None
222 224 self.nodetagscache = None
223 225 self._branchcaches = branchmap.BranchMapCache()
224 226 self._revbranchcache = None
225 227 self.encodepats = None
226 228 self.decodepats = None
227 229 self._transref = None
228 230 self._dirstate = None
229 231
230 232 def _restrictcapabilities(self, caps):
231 233 caps = super(statichttprepository, self)._restrictcapabilities(caps)
232 234 return caps.difference([b"pushkey"])
233 235
234 236 def url(self):
235 237 return self._url
236 238
237 239 def local(self):
238 240 return False
239 241
240 242 def peer(self, path=None, remotehidden=False):
241 243 return statichttppeer(self, path=path, remotehidden=remotehidden)
242 244
243 245 def wlock(self, wait=True):
244 246 raise error.LockUnavailable(
245 247 0,
246 248 pycompat.sysstr(_(b'lock not available')),
247 249 b'lock',
248 250 _(b'cannot lock static-http repository'),
249 251 )
250 252
251 253 def lock(self, wait=True):
252 254 raise error.LockUnavailable(
253 255 0,
254 256 pycompat.sysstr(_(b'lock not available')),
255 257 b'lock',
256 258 _(b'cannot lock static-http repository'),
257 259 )
258 260
259 261 def _writecaches(self):
260 262 pass # statichttprepository are read only
261 263
262 264
263 265 def make_peer(
264 266 ui, path, create, intents=None, createopts=None, remotehidden=False
265 267 ):
266 268 if create:
267 269 raise error.Abort(_(b'cannot create new static-http repository'))
268 270 url = path.loc[7:]
269 271 return statichttprepository(ui, url).peer(
270 272 path=path, remotehidden=remotehidden
271 273 )
@@ -1,1250 +1,1250 b''
1 1 # store.py - repository store handling for Mercurial)
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import collections
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 13 import typing
14 14
15 15 from typing import Generator, List
16 16
17 17 from .i18n import _
18 18 from .thirdparty import attr
19 19
20 20 # Force pytype to use the non-vendored package
21 21 if typing.TYPE_CHECKING:
22 22 # noinspection PyPackageRequirements
23 23 import attr
24 24
25 25 from .node import hex
26 26 from .revlogutils.constants import (
27 27 INDEX_HEADER,
28 28 KIND_CHANGELOG,
29 29 KIND_FILELOG,
30 30 KIND_MANIFESTLOG,
31 31 )
32 32 from . import (
33 33 changelog,
34 34 error,
35 35 filelog,
36 36 manifest,
37 37 policy,
38 38 pycompat,
39 39 revlog as revlogmod,
40 40 util,
41 41 vfs as vfsmod,
42 42 )
43 43 from .utils import hashutil
44 44
45 45 parsers = policy.importmod('parsers')
46 46 # how much bytes should be read from fncache in one read
47 47 # It is done to prevent loading large fncache files into memory
48 48 fncache_chunksize = 10**6
49 49
50 50
51 51 def _match_tracked_entry(entry: "BaseStoreEntry", matcher):
52 52 """parses a fncache entry and returns whether the entry is tracking a path
53 53 matched by matcher or not.
54 54
55 55 If matcher is None, returns True"""
56 56
57 57 if matcher is None:
58 58 return True
59 59
60 60 # TODO: make this safe for other entry types. Currently, the various
61 61 # store.data_entry generators only yield RevlogStoreEntry, so the
62 62 # attributes do exist on `entry`.
63 63 # pytype: disable=attribute-error
64 64 if entry.is_filelog:
65 65 return matcher(entry.target_id)
66 66 elif entry.is_manifestlog:
67 67 return matcher.visitdir(entry.target_id.rstrip(b'/'))
68 68 # pytype: enable=attribute-error
69 69 raise error.ProgrammingError(b"cannot process entry %r" % entry)
70 70
71 71
72 72 # This avoids a collision between a file named foo and a dir named
73 73 # foo.i or foo.d
74 74 def _encodedir(path):
75 75 """
76 76 >>> _encodedir(b'data/foo.i')
77 77 'data/foo.i'
78 78 >>> _encodedir(b'data/foo.i/bla.i')
79 79 'data/foo.i.hg/bla.i'
80 80 >>> _encodedir(b'data/foo.i.hg/bla.i')
81 81 'data/foo.i.hg.hg/bla.i'
82 82 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
83 83 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
84 84 """
85 85 return (
86 86 path.replace(b".hg/", b".hg.hg/")
87 87 .replace(b".i/", b".i.hg/")
88 88 .replace(b".d/", b".d.hg/")
89 89 )
90 90
91 91
92 92 encodedir = getattr(parsers, 'encodedir', _encodedir)
93 93
94 94
95 95 def decodedir(path):
96 96 """
97 97 >>> decodedir(b'data/foo.i')
98 98 'data/foo.i'
99 99 >>> decodedir(b'data/foo.i.hg/bla.i')
100 100 'data/foo.i/bla.i'
101 101 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
102 102 'data/foo.i.hg/bla.i'
103 103 """
104 104 if b".hg/" not in path:
105 105 return path
106 106 return (
107 107 path.replace(b".d.hg/", b".d/")
108 108 .replace(b".i.hg/", b".i/")
109 109 .replace(b".hg.hg/", b".hg/")
110 110 )
111 111
112 112
113 113 def _reserved():
114 114 """characters that are problematic for filesystems
115 115
116 116 * ascii escapes (0..31)
117 117 * ascii hi (126..255)
118 118 * windows specials
119 119
120 120 these characters will be escaped by encodefunctions
121 121 """
122 122 winreserved = [ord(x) for x in u'\\:*?"<>|']
123 123 for x in range(32):
124 124 yield x
125 125 for x in range(126, 256):
126 126 yield x
127 127 for x in winreserved:
128 128 yield x
129 129
130 130
131 131 def _buildencodefun():
132 132 """
133 133 >>> enc, dec = _buildencodefun()
134 134
135 135 >>> enc(b'nothing/special.txt')
136 136 'nothing/special.txt'
137 137 >>> dec(b'nothing/special.txt')
138 138 'nothing/special.txt'
139 139
140 140 >>> enc(b'HELLO')
141 141 '_h_e_l_l_o'
142 142 >>> dec(b'_h_e_l_l_o')
143 143 'HELLO'
144 144
145 145 >>> enc(b'hello:world?')
146 146 'hello~3aworld~3f'
147 147 >>> dec(b'hello~3aworld~3f')
148 148 'hello:world?'
149 149
150 150 >>> enc(b'the\\x07quick\\xADshot')
151 151 'the~07quick~adshot'
152 152 >>> dec(b'the~07quick~adshot')
153 153 'the\\x07quick\\xadshot'
154 154 """
155 155 e = b'_'
156 156 xchr = pycompat.bytechr
157 157 asciistr = list(map(xchr, range(127)))
158 158 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
159 159
160 160 cmap = {x: x for x in asciistr}
161 161 for x in _reserved():
162 162 cmap[xchr(x)] = b"~%02x" % x
163 163 for x in capitals + [ord(e)]:
164 164 cmap[xchr(x)] = e + xchr(x).lower()
165 165
166 166 dmap = {}
167 167 for k, v in cmap.items():
168 168 dmap[v] = k
169 169
170 170 def decode(s):
171 171 i = 0
172 172 while i < len(s):
173 173 for l in range(1, 4):
174 174 try:
175 175 yield dmap[s[i : i + l]]
176 176 i += l
177 177 break
178 178 except KeyError:
179 179 pass
180 180 else:
181 181 raise KeyError
182 182
183 183 return (
184 184 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
185 185 lambda s: b''.join(list(decode(s))),
186 186 )
187 187
188 188
189 189 _encodefname, _decodefname = _buildencodefun()
190 190
191 191
192 192 def encodefilename(s):
193 193 """
194 194 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
195 195 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
196 196 """
197 197 return _encodefname(encodedir(s))
198 198
199 199
200 200 def decodefilename(s):
201 201 """
202 202 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
203 203 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
204 204 """
205 205 return decodedir(_decodefname(s))
206 206
207 207
208 208 def _buildlowerencodefun():
209 209 """
210 210 >>> f = _buildlowerencodefun()
211 211 >>> f(b'nothing/special.txt')
212 212 'nothing/special.txt'
213 213 >>> f(b'HELLO')
214 214 'hello'
215 215 >>> f(b'hello:world?')
216 216 'hello~3aworld~3f'
217 217 >>> f(b'the\\x07quick\\xADshot')
218 218 'the~07quick~adshot'
219 219 """
220 220 xchr = pycompat.bytechr
221 221 cmap = {xchr(x): xchr(x) for x in range(127)}
222 222 for x in _reserved():
223 223 cmap[xchr(x)] = b"~%02x" % x
224 224 for x in range(ord(b"A"), ord(b"Z") + 1):
225 225 cmap[xchr(x)] = xchr(x).lower()
226 226
227 227 def lowerencode(s):
228 228 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
229 229
230 230 return lowerencode
231 231
232 232
233 233 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
234 234
235 235 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
236 236 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
237 237 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
238 238
239 239
240 240 def _auxencode(path, dotencode):
241 241 """
242 242 Encodes filenames containing names reserved by Windows or which end in
243 243 period or space. Does not touch other single reserved characters c.
244 244 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
245 245 Additionally encodes space or period at the beginning, if dotencode is
246 246 True. Parameter path is assumed to be all lowercase.
247 247 A segment only needs encoding if a reserved name appears as a
248 248 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
249 249 doesn't need encoding.
250 250
251 251 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
252 252 >>> _auxencode(s.split(b'/'), True)
253 253 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
254 254 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
255 255 >>> _auxencode(s.split(b'/'), False)
256 256 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
257 257 >>> _auxencode([b'foo. '], True)
258 258 ['foo.~20']
259 259 >>> _auxencode([b' .foo'], True)
260 260 ['~20.foo']
261 261 """
262 262 for i, n in enumerate(path):
263 263 if not n:
264 264 continue
265 265 if dotencode and n[0] in b'. ':
266 266 n = b"~%02x" % ord(n[0:1]) + n[1:]
267 267 path[i] = n
268 268 else:
269 269 l = n.find(b'.')
270 270 if l == -1:
271 271 l = len(n)
272 272 if (l == 3 and n[:3] in _winres3) or (
273 273 l == 4
274 274 and n[3:4] <= b'9'
275 275 and n[3:4] >= b'1'
276 276 and n[:3] in _winres4
277 277 ):
278 278 # encode third letter ('aux' -> 'au~78')
279 279 ec = b"~%02x" % ord(n[2:3])
280 280 n = n[0:2] + ec + n[3:]
281 281 path[i] = n
282 282 if n[-1] in b'. ':
283 283 # encode last period or space ('foo...' -> 'foo..~2e')
284 284 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
285 285 return path
286 286
287 287
288 288 _maxstorepathlen = 120
289 289 _dirprefixlen = 8
290 290 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
291 291
292 292
293 293 def _hashencode(path, dotencode):
294 294 digest = hex(hashutil.sha1(path).digest())
295 295 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
296 296 parts = _auxencode(le, dotencode)
297 297 basename = parts[-1]
298 298 _root, ext = os.path.splitext(basename)
299 299 sdirs = []
300 300 sdirslen = 0
301 301 for p in parts[:-1]:
302 302 d = p[:_dirprefixlen]
303 303 if d[-1] in b'. ':
304 304 # Windows can't access dirs ending in period or space
305 305 d = d[:-1] + b'_'
306 306 if sdirslen == 0:
307 307 t = len(d)
308 308 else:
309 309 t = sdirslen + 1 + len(d)
310 310 if t > _maxshortdirslen:
311 311 break
312 312 sdirs.append(d)
313 313 sdirslen = t
314 314 dirs = b'/'.join(sdirs)
315 315 if len(dirs) > 0:
316 316 dirs += b'/'
317 317 res = b'dh/' + dirs + digest + ext
318 318 spaceleft = _maxstorepathlen - len(res)
319 319 if spaceleft > 0:
320 320 filler = basename[:spaceleft]
321 321 res = b'dh/' + dirs + filler + digest + ext
322 322 return res
323 323
324 324
325 325 def _hybridencode(path, dotencode):
326 326 """encodes path with a length limit
327 327
328 328 Encodes all paths that begin with 'data/', according to the following.
329 329
330 330 Default encoding (reversible):
331 331
332 332 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
333 333 characters are encoded as '~xx', where xx is the two digit hex code
334 334 of the character (see encodefilename).
335 335 Relevant path components consisting of Windows reserved filenames are
336 336 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
337 337
338 338 Hashed encoding (not reversible):
339 339
340 340 If the default-encoded path is longer than _maxstorepathlen, a
341 341 non-reversible hybrid hashing of the path is done instead.
342 342 This encoding uses up to _dirprefixlen characters of all directory
343 343 levels of the lowerencoded path, but not more levels than can fit into
344 344 _maxshortdirslen.
345 345 Then follows the filler followed by the sha digest of the full path.
346 346 The filler is the beginning of the basename of the lowerencoded path
347 347 (the basename is everything after the last path separator). The filler
348 348 is as long as possible, filling in characters from the basename until
349 349 the encoded path has _maxstorepathlen characters (or all chars of the
350 350 basename have been taken).
351 351 The extension (e.g. '.i' or '.d') is preserved.
352 352
353 353 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
354 354 encoding was used.
355 355 """
356 356 path = encodedir(path)
357 357 ef = _encodefname(path).split(b'/')
358 358 res = b'/'.join(_auxencode(ef, dotencode))
359 359 if len(res) > _maxstorepathlen:
360 360 res = _hashencode(path, dotencode)
361 361 return res
362 362
363 363
364 364 def _pathencode(path):
365 365 de = encodedir(path)
366 366 if len(path) > _maxstorepathlen:
367 367 return _hashencode(de, True)
368 368 ef = _encodefname(de).split(b'/')
369 369 res = b'/'.join(_auxencode(ef, True))
370 370 if len(res) > _maxstorepathlen:
371 371 return _hashencode(de, True)
372 372 return res
373 373
374 374
375 375 _pathencode = getattr(parsers, 'pathencode', _pathencode)
376 376
377 377
378 378 def _plainhybridencode(f):
379 379 return _hybridencode(f, False)
380 380
381 381
382 382 def _calcmode(vfs):
383 383 try:
384 384 # files in .hg/ will be created using this mode
385 385 mode = vfs.stat().st_mode
386 386 # avoid some useless chmods
387 387 if (0o777 & ~util.umask) == (0o777 & mode):
388 388 mode = None
389 389 except OSError:
390 390 mode = None
391 391 return mode
392 392
393 393
394 394 _data = [
395 395 b'bookmarks',
396 396 b'narrowspec',
397 397 b'data',
398 398 b'meta',
399 399 b'00manifest.d',
400 400 b'00manifest.i',
401 401 b'00changelog.d',
402 402 b'00changelog.i',
403 403 b'phaseroots',
404 404 b'obsstore',
405 405 b'requires',
406 406 ]
407 407
408 408 REVLOG_FILES_EXT = (
409 409 b'.i',
410 410 b'.idx',
411 411 b'.d',
412 412 b'.dat',
413 413 b'.n',
414 414 b'.nd',
415 415 b'.sda',
416 416 )
417 417 # file extension that also use a `-SOMELONGIDHASH.ext` form
418 418 REVLOG_FILES_LONG_EXT = (
419 419 b'.nd',
420 420 b'.idx',
421 421 b'.dat',
422 422 b'.sda',
423 423 )
424 424 # files that are "volatile" and might change between listing and streaming
425 425 #
426 426 # note: the ".nd" file are nodemap data and won't "change" but they might be
427 427 # deleted.
428 428 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
429 429
430 430 # some exception to the above matching
431 431 #
432 432 # XXX This is currently not in use because of issue6542
433 433 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
434 434
435 435
436 436 def is_revlog(f, kind, st):
437 437 if kind != stat.S_IFREG:
438 438 return False
439 439 if f.endswith(REVLOG_FILES_EXT):
440 440 return True
441 441 return False
442 442
443 443
444 444 def is_revlog_file(f):
445 445 if f.endswith(REVLOG_FILES_EXT):
446 446 return True
447 447 return False
448 448
449 449
450 450 @attr.s(slots=True)
451 451 class StoreFile:
452 452 """a file matching a store entry"""
453 453
454 454 unencoded_path = attr.ib()
455 455 _file_size = attr.ib(default=None)
456 456 is_volatile = attr.ib(default=False)
457 457
458 458 def file_size(self, vfs):
459 459 if self._file_size is None:
460 460 if vfs is None:
461 461 msg = b"calling vfs-less file_size without prior call: %s"
462 462 msg %= self.unencoded_path
463 463 raise error.ProgrammingError(msg)
464 464 try:
465 465 self._file_size = vfs.stat(self.unencoded_path).st_size
466 466 except FileNotFoundError:
467 467 self._file_size = 0
468 468 return self._file_size
469 469
470 470 @property
471 471 def has_size(self):
472 472 return self._file_size is not None
473 473
474 474 def get_stream(self, vfs, copies):
475 475 """return data "stream" information for this file
476 476
477 477 (unencoded_file_path, content_iterator, content_size)
478 478 """
479 479 size = self.file_size(None)
480 480
481 481 def get_stream():
482 482 actual_path = copies[vfs.join(self.unencoded_path)]
483 483 with open(actual_path, 'rb') as fp:
484 484 yield None # ready to stream
485 485 if size <= 65536:
486 486 yield fp.read(size)
487 487 else:
488 488 yield from util.filechunkiter(fp, limit=size)
489 489
490 490 s = get_stream()
491 491 next(s)
492 492 return (self.unencoded_path, s, size)
493 493
494 494
495 495 @attr.s(slots=True, init=False)
496 496 class BaseStoreEntry:
497 497 """An entry in the store
498 498
499 499 This is returned by `store.walk` and represent some data in the store."""
500 500
501 501 maybe_volatile = True
502 502
503 503 def files(self) -> List[StoreFile]:
504 504 raise NotImplementedError
505 505
506 506 def get_streams(
507 507 self,
508 508 repo=None,
509 509 vfs=None,
510 510 copies=None,
511 511 max_changeset=None,
512 512 preserve_file_count=False,
513 513 ):
514 514 """return a list of data stream associated to files for this entry
515 515
516 516 return [(unencoded_file_path, content_iterator, content_size), …]
517 517 """
518 518 assert vfs is not None
519 519 return [f.get_stream(vfs, copies) for f in self.files()]
520 520
521 521
522 522 @attr.s(slots=True, init=False)
523 523 class SimpleStoreEntry(BaseStoreEntry):
524 524 """A generic entry in the store"""
525 525
526 526 is_revlog = False
527 527
528 528 maybe_volatile = attr.ib()
529 529 _entry_path = attr.ib()
530 530 _is_volatile = attr.ib(default=False)
531 531 _file_size = attr.ib(default=None)
532 532 _files = attr.ib(default=None)
533 533
534 534 def __init__(
535 535 self,
536 536 entry_path,
537 537 is_volatile=False,
538 538 file_size=None,
539 539 ):
540 540 super().__init__()
541 541 self._entry_path = entry_path
542 542 self._is_volatile = is_volatile
543 543 self._file_size = file_size
544 544 self._files = None
545 545 self.maybe_volatile = is_volatile
546 546
547 547 def files(self) -> List[StoreFile]:
548 548 if self._files is None:
549 549 self._files = [
550 550 StoreFile(
551 551 unencoded_path=self._entry_path,
552 552 file_size=self._file_size,
553 553 is_volatile=self._is_volatile,
554 554 )
555 555 ]
556 556 return self._files
557 557
558 558
559 559 @attr.s(slots=True, init=False)
560 560 class RevlogStoreEntry(BaseStoreEntry):
561 561 """A revlog entry in the store"""
562 562
563 563 is_revlog = True
564 564
565 565 revlog_type = attr.ib(default=None)
566 566 target_id = attr.ib(default=None)
567 567 maybe_volatile = attr.ib(default=True)
568 568 _path_prefix = attr.ib(default=None)
569 569 _details = attr.ib(default=None)
570 570 _files = attr.ib(default=None)
571 571
572 572 def __init__(
573 573 self,
574 574 revlog_type,
575 575 path_prefix,
576 576 target_id,
577 577 details,
578 578 ):
579 579 super().__init__()
580 580 self.revlog_type = revlog_type
581 581 self.target_id = target_id
582 582 self._path_prefix = path_prefix
583 583 assert b'.i' in details, (path_prefix, details)
584 584 for ext in details:
585 585 if ext.endswith(REVLOG_FILES_VOLATILE_EXT):
586 586 self.maybe_volatile = True
587 587 break
588 588 else:
589 589 self.maybe_volatile = False
590 590 self._details = details
591 591 self._files = None
592 592
593 593 @property
594 594 def is_changelog(self):
595 595 return self.revlog_type == KIND_CHANGELOG
596 596
597 597 @property
598 598 def is_manifestlog(self):
599 599 return self.revlog_type == KIND_MANIFESTLOG
600 600
601 601 @property
602 602 def is_filelog(self):
603 603 return self.revlog_type == KIND_FILELOG
604 604
605 605 def main_file_path(self):
606 606 """unencoded path of the main revlog file"""
607 607 return self._path_prefix + b'.i'
608 608
609 609 def files(self) -> List[StoreFile]:
610 610 if self._files is None:
611 611 self._files = []
612 612 for ext in sorted(self._details, key=_ext_key):
613 613 path = self._path_prefix + ext
614 614 file_size = self._details[ext]
615 615 # files that are "volatile" and might change between
616 616 # listing and streaming
617 617 #
618 618 # note: the ".nd" file are nodemap data and won't "change"
619 619 # but they might be deleted.
620 620 volatile = ext.endswith(REVLOG_FILES_VOLATILE_EXT)
621 621 f = StoreFile(path, file_size, volatile)
622 622 self._files.append(f)
623 623 return self._files
624 624
625 625 def get_streams(
626 626 self,
627 627 repo=None,
628 628 vfs=None,
629 629 copies=None,
630 630 max_changeset=None,
631 631 preserve_file_count=False,
632 632 ):
633 633 pre_sized = all(f.has_size for f in self.files())
634 634 if pre_sized and (
635 635 repo is None
636 636 or max_changeset is None
637 637 # This use revlog-v2, ignore for now
638 638 or any(k.endswith(b'.idx') for k in self._details.keys())
639 639 # This is not inline, no race expected
640 640 or b'.d' in self._details
641 641 ):
642 642 return super().get_streams(
643 643 repo=repo,
644 644 vfs=vfs,
645 645 copies=copies,
646 646 max_changeset=max_changeset,
647 647 preserve_file_count=preserve_file_count,
648 648 )
649 649 elif not preserve_file_count:
650 650 stream = [
651 651 f.get_stream(vfs, copies)
652 652 for f in self.files()
653 653 if not f.unencoded_path.endswith((b'.i', b'.d'))
654 654 ]
655 655 rl = self.get_revlog_instance(repo).get_revlog()
656 656 rl_stream = rl.get_streams(max_changeset)
657 657 stream.extend(rl_stream)
658 658 return stream
659 659
660 660 name_to_size = {}
661 661 for f in self.files():
662 662 name_to_size[f.unencoded_path] = f.file_size(None)
663 663
664 664 stream = [
665 665 f.get_stream(vfs, copies)
666 666 for f in self.files()
667 667 if not f.unencoded_path.endswith(b'.i')
668 668 ]
669 669
670 670 index_path = self._path_prefix + b'.i'
671 671
672 672 index_file = None
673 673 try:
674 674 index_file = vfs(index_path)
675 675 header = index_file.read(INDEX_HEADER.size)
676 676 if revlogmod.revlog.is_inline_index(header):
677 677 size = name_to_size[index_path]
678 678
679 679 # no split underneath, just return the stream
680 680 def get_stream():
681 681 fp = index_file
682 682 try:
683 683 fp.seek(0)
684 684 yield None
685 685 if size <= 65536:
686 686 yield fp.read(size)
687 687 else:
688 688 yield from util.filechunkiter(fp, limit=size)
689 689 finally:
690 690 fp.close()
691 691
692 692 s = get_stream()
693 693 next(s)
694 694 index_file = None
695 695 stream.append((index_path, s, size))
696 696 else:
697 697 rl = self.get_revlog_instance(repo).get_revlog()
698 698 rl_stream = rl.get_streams(max_changeset, force_inline=True)
699 699 for name, s, size in rl_stream:
700 700 if name_to_size.get(name, 0) != size:
701 701 msg = _(b"expected %d bytes but %d provided for %s")
702 702 msg %= name_to_size.get(name, 0), size, name
703 703 raise error.Abort(msg)
704 704 stream.extend(rl_stream)
705 705 finally:
706 706 if index_file is not None:
707 707 index_file.close()
708 708
709 709 files = self.files()
710 710 assert len(stream) == len(files), (
711 711 stream,
712 712 files,
713 713 self._path_prefix,
714 714 self.target_id,
715 715 )
716 716 return stream
717 717
718 718 def get_revlog_instance(self, repo):
719 719 """Obtain a revlog instance from this store entry
720 720
721 721 An instance of the appropriate class is returned.
722 722 """
723 723 if self.is_changelog:
724 724 return changelog.changelog(repo.svfs)
725 725 elif self.is_manifestlog:
726 726 mandir = self.target_id
727 727 return manifest.manifestrevlog(
728 728 repo.nodeconstants, repo.svfs, tree=mandir
729 729 )
730 730 else:
731 731 return filelog.filelog(repo.svfs, self.target_id)
732 732
733 733
734 734 def _gather_revlog(files_data):
735 735 """group files per revlog prefix
736 736
737 737 The returns a two level nested dict. The top level key is the revlog prefix
738 738 without extension, the second level is all the file "suffix" that were
739 739 seen for this revlog and arbitrary file data as value.
740 740 """
741 741 revlogs = collections.defaultdict(dict)
742 742 for u, value in files_data:
743 743 name, ext = _split_revlog_ext(u)
744 744 revlogs[name][ext] = value
745 745 return sorted(revlogs.items())
746 746
747 747
748 748 def _split_revlog_ext(filename):
749 749 """split the revlog file prefix from the variable extension"""
750 750 if filename.endswith(REVLOG_FILES_LONG_EXT):
751 751 char = b'-'
752 752 else:
753 753 char = b'.'
754 754 idx = filename.rfind(char)
755 755 return filename[:idx], filename[idx:]
756 756
757 757
758 758 def _ext_key(ext):
759 759 """a key to order revlog suffix
760 760
761 761 important to issue .i after other entry."""
762 762 # the only important part of this order is to keep the `.i` last.
763 763 if ext.endswith(b'.n'):
764 764 return (0, ext)
765 765 elif ext.endswith(b'.nd'):
766 766 return (10, ext)
767 767 elif ext.endswith(b'.d'):
768 768 return (20, ext)
769 769 elif ext.endswith(b'.i'):
770 770 return (50, ext)
771 771 else:
772 772 return (40, ext)
773 773
774 774
775 775 class basicstore:
776 776 '''base class for local repository stores'''
777 777
778 778 def __init__(self, path, vfstype):
779 779 vfs = vfstype(path)
780 780 self.path = vfs.base
781 781 self.createmode = _calcmode(vfs)
782 782 vfs.createmode = self.createmode
783 783 self.rawvfs = vfs
784 784 self.vfs = vfsmod.filtervfs(vfs, encodedir)
785 785 self.opener = self.vfs
786 786
787 787 def join(self, f):
788 788 return self.path + b'/' + encodedir(f)
789 789
790 790 def _walk(self, relpath, recurse, undecodable=None):
791 791 '''yields (revlog_type, unencoded, size)'''
792 792 path = self.path
793 793 if relpath:
794 794 path += b'/' + relpath
795 795 striplen = len(self.path) + 1
796 796 l = []
797 797 if self.rawvfs.isdir(path):
798 798 visit = [path]
799 799 readdir = self.rawvfs.readdir
800 800 while visit:
801 801 p = visit.pop()
802 802 for f, kind, st in readdir(p, stat=True):
803 803 fp = p + b'/' + f
804 804 if is_revlog(f, kind, st):
805 805 n = util.pconvert(fp[striplen:])
806 806 l.append((decodedir(n), st.st_size))
807 807 elif kind == stat.S_IFDIR and recurse:
808 808 visit.append(fp)
809 809
810 810 l.sort()
811 811 return l
812 812
813 813 def changelog(self, trypending, concurrencychecker=None):
814 814 return changelog.changelog(
815 815 self.vfs,
816 816 trypending=trypending,
817 817 concurrencychecker=concurrencychecker,
818 818 )
819 819
820 def manifestlog(self, repo, storenarrowmatch):
820 def manifestlog(self, repo, storenarrowmatch) -> manifest.ManifestLog:
821 821 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
822 822 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
823 823
824 824 def data_entries(
825 825 self, matcher=None, undecodable=None
826 826 ) -> Generator[BaseStoreEntry, None, None]:
827 827 """Like walk, but excluding the changelog and root manifest.
828 828
829 829 When [undecodable] is None, revlogs names that can't be
830 830 decoded cause an exception. When it is provided, it should
831 831 be a list and the filenames that can't be decoded are added
832 832 to it instead. This is very rarely needed."""
833 833 dirs = [
834 834 (b'data', KIND_FILELOG, False),
835 835 (b'meta', KIND_MANIFESTLOG, True),
836 836 ]
837 837 for base_dir, rl_type, strip_filename in dirs:
838 838 files = self._walk(base_dir, True, undecodable=undecodable)
839 839 for revlog, details in _gather_revlog(files):
840 840 revlog_target_id = revlog.split(b'/', 1)[1]
841 841 if strip_filename and b'/' in revlog:
842 842 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
843 843 revlog_target_id += b'/'
844 844 yield RevlogStoreEntry(
845 845 path_prefix=revlog,
846 846 revlog_type=rl_type,
847 847 target_id=revlog_target_id,
848 848 details=details,
849 849 )
850 850
851 851 def top_entries(
852 852 self, phase=False, obsolescence=False
853 853 ) -> Generator[BaseStoreEntry, None, None]:
854 854 if phase and self.vfs.exists(b'phaseroots'):
855 855 yield SimpleStoreEntry(
856 856 entry_path=b'phaseroots',
857 857 is_volatile=True,
858 858 )
859 859
860 860 if obsolescence and self.vfs.exists(b'obsstore'):
861 861 # XXX if we had the file size it could be non-volatile
862 862 yield SimpleStoreEntry(
863 863 entry_path=b'obsstore',
864 864 is_volatile=True,
865 865 )
866 866
867 867 files = reversed(self._walk(b'', False))
868 868
869 869 changelogs = collections.defaultdict(dict)
870 870 manifestlogs = collections.defaultdict(dict)
871 871
872 872 for u, s in files:
873 873 if u.startswith(b'00changelog'):
874 874 name, ext = _split_revlog_ext(u)
875 875 changelogs[name][ext] = s
876 876 elif u.startswith(b'00manifest'):
877 877 name, ext = _split_revlog_ext(u)
878 878 manifestlogs[name][ext] = s
879 879 else:
880 880 yield SimpleStoreEntry(
881 881 entry_path=u,
882 882 is_volatile=False,
883 883 file_size=s,
884 884 )
885 885 # yield manifest before changelog
886 886 top_rl = [
887 887 (manifestlogs, KIND_MANIFESTLOG),
888 888 (changelogs, KIND_CHANGELOG),
889 889 ]
890 890 assert len(manifestlogs) <= 1
891 891 assert len(changelogs) <= 1
892 892 for data, revlog_type in top_rl:
893 893 for revlog, details in sorted(data.items()):
894 894 yield RevlogStoreEntry(
895 895 path_prefix=revlog,
896 896 revlog_type=revlog_type,
897 897 target_id=b'',
898 898 details=details,
899 899 )
900 900
901 901 def walk(
902 902 self, matcher=None, phase=False, obsolescence=False
903 903 ) -> Generator[BaseStoreEntry, None, None]:
904 904 """return files related to data storage (ie: revlogs)
905 905
906 906 yields instance from BaseStoreEntry subclasses
907 907
908 908 if a matcher is passed, storage files of only those tracked paths
909 909 are passed with matches the matcher
910 910 """
911 911 # yield data files first
912 912 for x in self.data_entries(matcher):
913 913 yield x
914 914 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
915 915 yield x
916 916
917 917 def copylist(self):
918 918 return _data
919 919
920 920 def write(self, tr):
921 921 pass
922 922
923 923 def invalidatecaches(self):
924 924 pass
925 925
926 926 def markremoved(self, fn):
927 927 pass
928 928
929 929 def __contains__(self, path):
930 930 '''Checks if the store contains path'''
931 931 path = b"/".join((b"data", path))
932 932 # file?
933 933 if self.vfs.exists(path + b".i"):
934 934 return True
935 935 # dir?
936 936 if not path.endswith(b"/"):
937 937 path = path + b"/"
938 938 return self.vfs.exists(path)
939 939
940 940
941 941 class encodedstore(basicstore):
942 942 def __init__(self, path, vfstype):
943 943 vfs = vfstype(path + b'/store')
944 944 self.path = vfs.base
945 945 self.createmode = _calcmode(vfs)
946 946 vfs.createmode = self.createmode
947 947 self.rawvfs = vfs
948 948 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
949 949 self.opener = self.vfs
950 950
951 951 def _walk(self, relpath, recurse, undecodable=None):
952 952 old = super()._walk(relpath, recurse)
953 953 new = []
954 954 for f1, value in old:
955 955 try:
956 956 f2 = decodefilename(f1)
957 957 except KeyError:
958 958 if undecodable is None:
959 959 msg = _(b'undecodable revlog name %s') % f1
960 960 raise error.StorageError(msg)
961 961 else:
962 962 undecodable.append(f1)
963 963 continue
964 964 new.append((f2, value))
965 965 return new
966 966
967 967 def data_entries(
968 968 self, matcher=None, undecodable=None
969 969 ) -> Generator[BaseStoreEntry, None, None]:
970 970 entries = super(encodedstore, self).data_entries(
971 971 undecodable=undecodable
972 972 )
973 973 for entry in entries:
974 974 if _match_tracked_entry(entry, matcher):
975 975 yield entry
976 976
977 977 def join(self, f):
978 978 return self.path + b'/' + encodefilename(f)
979 979
980 980 def copylist(self):
981 981 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
982 982
983 983
984 984 class fncache:
985 985 # the filename used to be partially encoded
986 986 # hence the encodedir/decodedir dance
987 987 def __init__(self, vfs):
988 988 self.vfs = vfs
989 989 self._ignores = set()
990 990 self.entries = None
991 991 self._dirty = False
992 992 # set of new additions to fncache
993 993 self.addls = set()
994 994
995 995 def ensureloaded(self, warn=None):
996 996 """read the fncache file if not already read.
997 997
998 998 If the file on disk is corrupted, raise. If warn is provided,
999 999 warn and keep going instead."""
1000 1000 if self.entries is None:
1001 1001 self._load(warn)
1002 1002
1003 1003 def _load(self, warn=None):
1004 1004 '''fill the entries from the fncache file'''
1005 1005 self._dirty = False
1006 1006 try:
1007 1007 fp = self.vfs(b'fncache', mode=b'rb')
1008 1008 except IOError:
1009 1009 # skip nonexistent file
1010 1010 self.entries = set()
1011 1011 return
1012 1012
1013 1013 self.entries = set()
1014 1014 chunk = b''
1015 1015 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
1016 1016 chunk += c
1017 1017 try:
1018 1018 p = chunk.rindex(b'\n')
1019 1019 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
1020 1020 chunk = chunk[p + 1 :]
1021 1021 except ValueError:
1022 1022 # substring '\n' not found, maybe the entry is bigger than the
1023 1023 # chunksize, so let's keep iterating
1024 1024 pass
1025 1025
1026 1026 if chunk:
1027 1027 msg = _(b"fncache does not ends with a newline")
1028 1028 if warn:
1029 1029 warn(msg + b'\n')
1030 1030 else:
1031 1031 raise error.Abort(
1032 1032 msg,
1033 1033 hint=_(
1034 1034 b"use 'hg debugrebuildfncache' to "
1035 1035 b"rebuild the fncache"
1036 1036 ),
1037 1037 )
1038 1038 self._checkentries(fp, warn)
1039 1039 fp.close()
1040 1040
1041 1041 def _checkentries(self, fp, warn):
1042 1042 """make sure there is no empty string in entries"""
1043 1043 if b'' in self.entries:
1044 1044 fp.seek(0)
1045 1045 for n, line in enumerate(fp):
1046 1046 if not line.rstrip(b'\n'):
1047 1047 t = _(b'invalid entry in fncache, line %d') % (n + 1)
1048 1048 if warn:
1049 1049 warn(t + b'\n')
1050 1050 else:
1051 1051 raise error.Abort(t)
1052 1052
1053 1053 def write(self, tr):
1054 1054 if self._dirty:
1055 1055 assert self.entries is not None
1056 1056 self.entries = self.entries | self.addls
1057 1057 self.addls = set()
1058 1058 tr.addbackup(b'fncache')
1059 1059 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
1060 1060 if self.entries:
1061 1061 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
1062 1062 fp.close()
1063 1063 self._dirty = False
1064 1064 if self.addls:
1065 1065 # if we have just new entries, let's append them to the fncache
1066 1066 tr.addbackup(b'fncache')
1067 1067 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
1068 1068 if self.addls:
1069 1069 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
1070 1070 fp.close()
1071 1071 self.entries = None
1072 1072 self.addls = set()
1073 1073
1074 1074 def addignore(self, fn):
1075 1075 self._ignores.add(fn)
1076 1076
1077 1077 def add(self, fn):
1078 1078 if fn in self._ignores:
1079 1079 return
1080 1080 if self.entries is None:
1081 1081 self._load()
1082 1082 if fn not in self.entries:
1083 1083 self.addls.add(fn)
1084 1084
1085 1085 def remove(self, fn):
1086 1086 if self.entries is None:
1087 1087 self._load()
1088 1088 if fn in self.addls:
1089 1089 self.addls.remove(fn)
1090 1090 return
1091 1091 try:
1092 1092 self.entries.remove(fn)
1093 1093 self._dirty = True
1094 1094 except KeyError:
1095 1095 pass
1096 1096
1097 1097 def __contains__(self, fn):
1098 1098 if fn in self.addls:
1099 1099 return True
1100 1100 if self.entries is None:
1101 1101 self._load()
1102 1102 return fn in self.entries
1103 1103
1104 1104 def __iter__(self):
1105 1105 if self.entries is None:
1106 1106 self._load()
1107 1107 return iter(self.entries | self.addls)
1108 1108
1109 1109
1110 1110 class _fncachevfs(vfsmod.proxyvfs):
1111 1111 def __init__(self, vfs, fnc, encode):
1112 1112 vfsmod.proxyvfs.__init__(self, vfs)
1113 1113 self.fncache = fnc
1114 1114 self.encode = encode
1115 1115
1116 1116 def __call__(self, path, mode=b'r', *args, **kw):
1117 1117 encoded = self.encode(path)
1118 1118 if (
1119 1119 mode not in (b'r', b'rb')
1120 1120 and (path.startswith(b'data/') or path.startswith(b'meta/'))
1121 1121 and is_revlog_file(path)
1122 1122 ):
1123 1123 # do not trigger a fncache load when adding a file that already is
1124 1124 # known to exist.
1125 1125 notload = self.fncache.entries is None and (
1126 1126 # if the file has size zero, it should be considered as missing.
1127 1127 # Such zero-size files are the result of truncation when a
1128 1128 # transaction is aborted.
1129 1129 self.vfs.exists(encoded)
1130 1130 and self.vfs.stat(encoded).st_size
1131 1131 )
1132 1132 if not notload:
1133 1133 self.fncache.add(path)
1134 1134 return self.vfs(encoded, mode, *args, **kw)
1135 1135
1136 1136 def join(self, path):
1137 1137 if path:
1138 1138 return self.vfs.join(self.encode(path))
1139 1139 else:
1140 1140 return self.vfs.join(path)
1141 1141
1142 1142 def register_file(self, path):
1143 1143 """generic hook point to lets fncache steer its stew"""
1144 1144 if path.startswith(b'data/') or path.startswith(b'meta/'):
1145 1145 self.fncache.add(path)
1146 1146
1147 1147
1148 1148 class fncachestore(basicstore):
1149 1149 def __init__(self, path, vfstype, dotencode):
1150 1150 if dotencode:
1151 1151 encode = _pathencode
1152 1152 else:
1153 1153 encode = _plainhybridencode
1154 1154 self.encode = encode
1155 1155 vfs = vfstype(path + b'/store')
1156 1156 self.path = vfs.base
1157 1157 self.pathsep = self.path + b'/'
1158 1158 self.createmode = _calcmode(vfs)
1159 1159 vfs.createmode = self.createmode
1160 1160 self.rawvfs = vfs
1161 1161 fnc = fncache(vfs)
1162 1162 self.fncache = fnc
1163 1163 self.vfs = _fncachevfs(vfs, fnc, encode)
1164 1164 self.opener = self.vfs
1165 1165
1166 1166 def join(self, f):
1167 1167 return self.pathsep + self.encode(f)
1168 1168
1169 1169 def getsize(self, path):
1170 1170 return self.rawvfs.stat(path).st_size
1171 1171
1172 1172 def data_entries(
1173 1173 self, matcher=None, undecodable=None
1174 1174 ) -> Generator[BaseStoreEntry, None, None]:
1175 1175 # Note: all files in fncache should be revlog related, However the
1176 1176 # fncache might contains such file added by previous version of
1177 1177 # Mercurial.
1178 1178 files = ((f, None) for f in self.fncache if is_revlog_file(f))
1179 1179 by_revlog = _gather_revlog(files)
1180 1180 for revlog, details in by_revlog:
1181 1181 if revlog.startswith(b'data/'):
1182 1182 rl_type = KIND_FILELOG
1183 1183 revlog_target_id = revlog.split(b'/', 1)[1]
1184 1184 elif revlog.startswith(b'meta/'):
1185 1185 rl_type = KIND_MANIFESTLOG
1186 1186 # drop the initial directory and the `00manifest` file part
1187 1187 tmp = revlog.split(b'/', 1)[1]
1188 1188 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1189 1189 else:
1190 1190 # unreachable
1191 1191 assert False, revlog
1192 1192 entry = RevlogStoreEntry(
1193 1193 path_prefix=revlog,
1194 1194 revlog_type=rl_type,
1195 1195 target_id=revlog_target_id,
1196 1196 details=details,
1197 1197 )
1198 1198 if _match_tracked_entry(entry, matcher):
1199 1199 yield entry
1200 1200
1201 1201 def copylist(self):
1202 1202 d = (
1203 1203 b'bookmarks',
1204 1204 b'narrowspec',
1205 1205 b'data',
1206 1206 b'meta',
1207 1207 b'dh',
1208 1208 b'fncache',
1209 1209 b'phaseroots',
1210 1210 b'obsstore',
1211 1211 b'00manifest.d',
1212 1212 b'00manifest.i',
1213 1213 b'00changelog.d',
1214 1214 b'00changelog.i',
1215 1215 b'requires',
1216 1216 )
1217 1217 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1218 1218
1219 1219 def write(self, tr):
1220 1220 self.fncache.write(tr)
1221 1221
1222 1222 def invalidatecaches(self):
1223 1223 self.fncache.entries = None
1224 1224 self.fncache.addls = set()
1225 1225
1226 1226 def markremoved(self, fn):
1227 1227 self.fncache.remove(fn)
1228 1228
1229 1229 def _exists(self, f):
1230 1230 ef = self.encode(f)
1231 1231 try:
1232 1232 self.getsize(ef)
1233 1233 return True
1234 1234 except FileNotFoundError:
1235 1235 return False
1236 1236
1237 1237 def __contains__(self, path):
1238 1238 '''Checks if the store contains path'''
1239 1239 path = b"/".join((b"data", path))
1240 1240 # check for files (exact match)
1241 1241 e = path + b'.i'
1242 1242 if e in self.fncache and self._exists(e):
1243 1243 return True
1244 1244 # now check for directories (prefix match)
1245 1245 if not path.endswith(b'/'):
1246 1246 path += b'/'
1247 1247 for e in self.fncache:
1248 1248 if e.startswith(path) and self._exists(e):
1249 1249 return True
1250 1250 return False
@@ -1,344 +1,351 b''
1 1 # unionrepo.py - repository class for viewing union of repository changesets
2 2 #
3 3 # Derived from bundlerepo.py
4 4 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
5 5 # Copyright 2013 Unity Technologies, Mads Kiilerich <madski@unity3d.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Repository class for "in-memory pull" of one local repository to another,
11 11 allowing operations like diff and log with revsets.
12 12 """
13 13
14 14 import contextlib
15 15
16 16
17 17 from .i18n import _
18 18
19 19 from . import (
20 20 changelog,
21 21 cmdutil,
22 22 encoding,
23 23 error,
24 24 filelog,
25 25 localrepo,
26 26 manifest,
27 27 mdiff,
28 28 pathutil,
29 29 revlog,
30 30 util,
31 31 vfs as vfsmod,
32 32 )
33 33
34 34 from .revlogutils import (
35 35 constants as revlog_constants,
36 36 )
37 37
38 38
39 39 class unionrevlog(revlog.revlog):
40 40 def __init__(self, opener, radix, revlog2, linkmapper):
41 41 # How it works:
42 42 # To retrieve a revision, we just need to know the node id so we can
43 43 # look it up in revlog2.
44 44 #
45 45 # To differentiate a rev in the second revlog from a rev in the revlog,
46 46 # we check revision against repotiprev.
47 47 opener = vfsmod.readonlyvfs(opener)
48 48 target = getattr(revlog2, 'target', None)
49 49 if target is None:
50 50 # a revlog wrapper, eg: the manifestlog that is not an actual revlog
51 51 target = revlog2._revlog.target
52 52 revlog.revlog.__init__(self, opener, target=target, radix=radix)
53 53 self.revlog2 = revlog2
54 54
55 55 n = len(self)
56 56 self.repotiprev = n - 1
57 57 self.bundlerevs = set() # used by 'bundle()' revset expression
58 58 for rev2 in self.revlog2:
59 59 rev = self.revlog2.index[rev2]
60 60 # rev numbers - in revlog2, very different from self.rev
61 61 (
62 62 _start,
63 63 _csize,
64 64 rsize,
65 65 base,
66 66 linkrev,
67 67 p1rev,
68 68 p2rev,
69 69 node,
70 70 _sdo,
71 71 _sds,
72 72 _dcm,
73 73 _sdcm,
74 74 rank,
75 75 ) = rev
76 76 flags = _start & 0xFFFF
77 77
78 78 if linkmapper is None: # link is to same revlog
79 79 assert linkrev == rev2 # we never link back
80 80 link = n
81 81 else: # rev must be mapped from repo2 cl to unified cl by linkmapper
82 82 link = linkmapper(linkrev)
83 83
84 84 if linkmapper is not None: # link is to same revlog
85 85 base = linkmapper(base)
86 86
87 87 this_rev = self.index.get_rev(node)
88 88 if this_rev is not None:
89 89 # this happens for the common revlog revisions
90 90 self.bundlerevs.add(this_rev)
91 91 continue
92 92
93 93 p1node = self.revlog2.node(p1rev)
94 94 p2node = self.revlog2.node(p2rev)
95 95
96 96 # TODO: it's probably wrong to set compressed length to -1, but
97 97 # I have no idea if csize is valid in the base revlog context.
98 98 e = (
99 99 flags,
100 100 -1,
101 101 rsize,
102 102 base,
103 103 link,
104 104 self.rev(p1node),
105 105 self.rev(p2node),
106 106 node,
107 107 0, # sidedata offset
108 108 0, # sidedata size
109 109 revlog_constants.COMP_MODE_INLINE,
110 110 revlog_constants.COMP_MODE_INLINE,
111 111 rank,
112 112 )
113 113 self.index.append(e)
114 114 self.bundlerevs.add(n)
115 115 n += 1
116 116
117 117 @contextlib.contextmanager
118 118 def reading(self):
119 119 if 0 <= len(self.bundlerevs) < len(self.index):
120 120 read_1 = super().reading
121 121 else:
122 122 read_1 = util.nullcontextmanager
123 123 if 0 < len(self.bundlerevs):
124 124 read_2 = self.revlog2.reading
125 125 else:
126 126 read_2 = util.nullcontextmanager
127 127 with read_1(), read_2():
128 128 yield
129 129
130 130 def _chunk(self, rev):
131 131 if rev <= self.repotiprev:
132 132 return revlog.revlog._chunk(self, rev)
133 133 return self.revlog2._chunk(self.node(rev))
134 134
135 135 def revdiff(self, rev1, rev2):
136 136 """return or calculate a delta between two revisions"""
137 137 if rev1 > self.repotiprev and rev2 > self.repotiprev:
138 138 return self.revlog2.revdiff(
139 139 self.revlog2.rev(self.node(rev1)),
140 140 self.revlog2.rev(self.node(rev2)),
141 141 )
142 142 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
143 143 return super(unionrevlog, self).revdiff(rev1, rev2)
144 144
145 145 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
146 146
147 147 def _revisiondata(self, nodeorrev, raw=False):
148 148 if isinstance(nodeorrev, int):
149 149 rev = nodeorrev
150 150 node = self.node(rev)
151 151 else:
152 152 node = nodeorrev
153 153 rev = self.rev(node)
154 154
155 155 if rev > self.repotiprev:
156 156 # work around manifestrevlog NOT being a revlog
157 157 revlog2 = getattr(self.revlog2, '_revlog', self.revlog2)
158 158 func = revlog2._revisiondata
159 159 else:
160 160 func = super(unionrevlog, self)._revisiondata
161 161 return func(node, raw=raw)
162 162
163 163 def addrevision(
164 164 self,
165 165 text,
166 166 transaction,
167 167 link,
168 168 p1,
169 169 p2,
170 170 cachedelta=None,
171 171 node=None,
172 172 flags=revlog.REVIDX_DEFAULT_FLAGS,
173 173 deltacomputer=None,
174 174 sidedata=None,
175 175 ):
176 176 raise NotImplementedError
177 177
178 178 def addgroup(
179 179 self,
180 180 deltas,
181 181 linkmapper,
182 182 transaction,
183 183 alwayscache=False,
184 184 addrevisioncb=None,
185 185 duplicaterevisioncb=None,
186 186 debug_info=None,
187 187 delta_base_reuse_policy=None,
188 188 ):
189 189 raise NotImplementedError
190 190
191 191 def strip(self, minlink, transaction):
192 192 raise NotImplementedError
193 193
194 194 def checksize(self):
195 195 raise NotImplementedError
196 196
197 197
198 198 class unionchangelog(unionrevlog, changelog.changelog):
199 199 def __init__(self, opener, opener2):
200 200 changelog.changelog.__init__(self, opener)
201 201 linkmapper = None
202 202 changelog2 = changelog.changelog(opener2)
203 203 unionrevlog.__init__(self, opener, self.radix, changelog2, linkmapper)
204 204
205 205
206 206 class unionmanifest(unionrevlog, manifest.manifestrevlog):
207 repotiprev: int
208 revlog2: manifest.ManifestRevlog
209
207 210 def __init__(self, nodeconstants, opener, opener2, linkmapper):
208 211 # XXX manifestrevlog is not actually a revlog , so mixing it with
209 212 # bundlerevlog is not a good idea.
210 213 manifest.manifestrevlog.__init__(self, nodeconstants, opener)
211 214 manifest2 = manifest.manifestrevlog(nodeconstants, opener2)
212 215 unionrevlog.__init__(
213 216 self, opener, self._revlog.radix, manifest2, linkmapper
214 217 )
215 218
216 219
217 220 class unionfilelog(filelog.filelog):
221 _revlog: unionrevlog
222 repotiprev: int
223 revlog2: revlog.revlog
224
218 225 def __init__(self, opener, path, opener2, linkmapper, repo):
219 226 filelog.filelog.__init__(self, opener, path)
220 227 filelog2 = filelog.filelog(opener2, path)
221 228 self._revlog = unionrevlog(
222 229 opener, self._revlog.radix, filelog2._revlog, linkmapper
223 230 )
224 231 self._repo = repo
225 232 self.repotiprev = self._revlog.repotiprev
226 233 self.revlog2 = self._revlog.revlog2
227 234
228 235 def iscensored(self, rev):
229 236 """Check if a revision is censored."""
230 237 if rev <= self.repotiprev:
231 238 return filelog.filelog.iscensored(self, rev)
232 239 node = self.node(rev)
233 240 return self.revlog2.iscensored(self.revlog2.rev(node))
234 241
235 242
236 243 class unionpeer(localrepo.localpeer):
237 244 def canpush(self):
238 245 return False
239 246
240 247
241 248 class unionrepository:
242 249 """Represents the union of data in 2 repositories.
243 250
244 251 Instances are not usable if constructed directly. Use ``instance()``
245 252 or ``makeunionrepository()`` to create a usable instance.
246 253 """
247 254
248 255 def __init__(self, repo2, url):
249 256 self.repo2 = repo2
250 257 self._url = url
251 258
252 259 self.ui.setconfig(b'phases', b'publish', False, b'unionrepo')
253 260
254 261 @localrepo.unfilteredpropertycache
255 262 def changelog(self):
256 263 return unionchangelog(self.svfs, self.repo2.svfs)
257 264
258 265 @localrepo.unfilteredpropertycache
259 266 def manifestlog(self):
260 267 rootstore = unionmanifest(
261 268 self.nodeconstants,
262 269 self.svfs,
263 270 self.repo2.svfs,
264 271 self.unfiltered()._clrev,
265 272 )
266 273 return manifest.manifestlog(
267 274 self.svfs, self, rootstore, self.narrowmatch()
268 275 )
269 276
270 277 def _clrev(self, rev2):
271 278 """map from repo2 changelog rev to temporary rev in self.changelog"""
272 279 node = self.repo2.changelog.node(rev2)
273 280 return self.changelog.rev(node)
274 281
275 282 def url(self):
276 283 return self._url
277 284
278 285 def file(self, f):
279 286 return unionfilelog(
280 287 self.svfs, f, self.repo2.svfs, self.unfiltered()._clrev, self
281 288 )
282 289
283 290 def close(self):
284 291 self.repo2.close()
285 292
286 293 def cancopy(self):
287 294 return False
288 295
289 296 def peer(self, path=None, remotehidden=False):
290 297 return unionpeer(self, path=None, remotehidden=remotehidden)
291 298
292 299 def getcwd(self):
293 300 return encoding.getcwd() # always outside the repo
294 301
295 302
296 303 def instance(ui, path, create, intents=None, createopts=None):
297 304 if create:
298 305 raise error.Abort(_(b'cannot create new union repository'))
299 306 parentpath = ui.config(b"bundle", b"mainreporoot")
300 307 if not parentpath:
301 308 # try to find the correct path to the working directory repo
302 309 parentpath = cmdutil.findrepo(encoding.getcwd())
303 310 if parentpath is None:
304 311 parentpath = b''
305 312 if parentpath:
306 313 # Try to make the full path relative so we get a nice, short URL.
307 314 # In particular, we don't want temp dir names in test outputs.
308 315 cwd = encoding.getcwd()
309 316 if parentpath == cwd:
310 317 parentpath = b''
311 318 else:
312 319 cwd = pathutil.normasprefix(cwd)
313 320 if parentpath.startswith(cwd):
314 321 parentpath = parentpath[len(cwd) :]
315 322 if path.startswith(b'union:'):
316 323 s = path.split(b":", 1)[1].split(b"+", 1)
317 324 if len(s) == 1:
318 325 repopath, repopath2 = parentpath, s[0]
319 326 else:
320 327 repopath, repopath2 = s
321 328 else:
322 329 repopath, repopath2 = parentpath, path
323 330
324 331 return makeunionrepository(ui, repopath, repopath2)
325 332
326 333
327 334 def makeunionrepository(ui, repopath1, repopath2):
328 335 """Make a union repository object from 2 local repo paths."""
329 336 repo1 = localrepo.instance(ui, repopath1, create=False)
330 337 repo2 = localrepo.instance(ui, repopath2, create=False)
331 338
332 339 url = b'union:%s+%s' % (
333 340 util.expandpath(repopath1),
334 341 util.expandpath(repopath2),
335 342 )
336 343
337 344 class derivedunionrepository(unionrepository, repo1.__class__):
338 345 pass
339 346
340 347 repo = repo1
341 348 repo.__class__ = derivedunionrepository
342 349 unionrepository.__init__(repo1, repo2, url)
343 350
344 351 return repo
General Comments 0
You need to be logged in to leave comments. Login now