Show More
This diff has been collapsed as it changes many lines, (589 lines changed) Show them Hide them | |||
@@ -0,0 +1,589 | |||
|
1 | # simplestorerepo.py - Extension that swaps in alternate repository storage. | |
|
2 | # | |
|
3 | # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com> | |
|
4 | # | |
|
5 | # This software may be used and distributed according to the terms of the | |
|
6 | # GNU General Public License version 2 or any later version. | |
|
7 | ||
|
8 | from __future__ import absolute_import | |
|
9 | ||
|
10 | from mercurial.i18n import _ | |
|
11 | from mercurial.node import ( | |
|
12 | bin, | |
|
13 | hex, | |
|
14 | nullid, | |
|
15 | nullrev, | |
|
16 | ) | |
|
17 | from mercurial.thirdparty import ( | |
|
18 | cbor, | |
|
19 | ) | |
|
20 | from mercurial import ( | |
|
21 | ancestor, | |
|
22 | error, | |
|
23 | filelog, | |
|
24 | mdiff, | |
|
25 | pycompat, | |
|
26 | revlog, | |
|
27 | ) | |
|
28 | ||
|
29 | # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for | |
|
30 | # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should | |
|
31 | # be specifying the version(s) of Mercurial they are tested with, or | |
|
32 | # leave the attribute unspecified. | |
|
33 | testedwith = 'ships-with-hg-core' | |
|
34 | ||
|
35 | def validatenode(node): | |
|
36 | if isinstance(node, int): | |
|
37 | raise ValueError('expected node; got int') | |
|
38 | ||
|
39 | if len(node) != 20: | |
|
40 | raise ValueError('expected 20 byte node') | |
|
41 | ||
|
42 | def validaterev(rev): | |
|
43 | if not isinstance(rev, int): | |
|
44 | raise ValueError('expected int') | |
|
45 | ||
|
46 | class filestorage(object): | |
|
47 | """Implements storage for a tracked path. | |
|
48 | ||
|
49 | Data is stored in the VFS in a directory corresponding to the tracked | |
|
50 | path. | |
|
51 | ||
|
52 | Index data is stored in an ``index`` file using CBOR. | |
|
53 | ||
|
54 | Fulltext data is stored in files having names of the node. | |
|
55 | """ | |
|
56 | ||
|
57 | def __init__(self, svfs, path): | |
|
58 | self._svfs = svfs | |
|
59 | self._path = path | |
|
60 | ||
|
61 | self._storepath = b'/'.join([b'data', path]) | |
|
62 | self._indexpath = b'/'.join([self._storepath, b'index']) | |
|
63 | ||
|
64 | indexdata = self._svfs.tryread(self._indexpath) | |
|
65 | if indexdata: | |
|
66 | indexdata = cbor.loads(indexdata) | |
|
67 | ||
|
68 | self._indexdata = indexdata or [] | |
|
69 | self._indexbynode = {} | |
|
70 | self._indexbyrev = {} | |
|
71 | self.index = [] | |
|
72 | self._refreshindex() | |
|
73 | ||
|
74 | # This is used by changegroup code :/ | |
|
75 | self._generaldelta = True | |
|
76 | self.storedeltachains = False | |
|
77 | ||
|
78 | self.version = 1 | |
|
79 | ||
|
80 | def _refreshindex(self): | |
|
81 | self._indexbynode.clear() | |
|
82 | self._indexbyrev.clear() | |
|
83 | self.index = [] | |
|
84 | ||
|
85 | for i, entry in enumerate(self._indexdata): | |
|
86 | self._indexbynode[entry[b'node']] = entry | |
|
87 | self._indexbyrev[i] = entry | |
|
88 | ||
|
89 | self._indexbynode[nullid] = { | |
|
90 | b'node': nullid, | |
|
91 | b'p1': nullid, | |
|
92 | b'p2': nullid, | |
|
93 | b'linkrev': nullrev, | |
|
94 | b'flags': 0, | |
|
95 | } | |
|
96 | ||
|
97 | self._indexbyrev[nullrev] = { | |
|
98 | b'node': nullid, | |
|
99 | b'p1': nullid, | |
|
100 | b'p2': nullid, | |
|
101 | b'linkrev': nullrev, | |
|
102 | b'flags': 0, | |
|
103 | } | |
|
104 | ||
|
105 | for i, entry in enumerate(self._indexdata): | |
|
106 | p1rev, p2rev = self.parentrevs(self.rev(entry[b'node'])) | |
|
107 | ||
|
108 | # start, length, rawsize, chainbase, linkrev, p1, p2, node | |
|
109 | self.index.append((0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev, | |
|
110 | entry[b'node'])) | |
|
111 | ||
|
112 | self.index.append((0, 0, 0, -1, -1, -1, -1, nullid)) | |
|
113 | ||
|
114 | def __len__(self): | |
|
115 | return len(self._indexdata) | |
|
116 | ||
|
117 | def __iter__(self): | |
|
118 | return iter(range(len(self))) | |
|
119 | ||
|
120 | def revs(self, start=0, stop=None): | |
|
121 | step = 1 | |
|
122 | if stop is not None: | |
|
123 | if start > stop: | |
|
124 | step = -1 | |
|
125 | ||
|
126 | stop += step | |
|
127 | else: | |
|
128 | stop = len(self) | |
|
129 | ||
|
130 | return range(start, stop, step) | |
|
131 | ||
|
132 | def parents(self, node): | |
|
133 | validatenode(node) | |
|
134 | ||
|
135 | if node not in self._indexbynode: | |
|
136 | raise KeyError('unknown node') | |
|
137 | ||
|
138 | entry = self._indexbynode[node] | |
|
139 | ||
|
140 | return entry[b'p1'], entry[b'p2'] | |
|
141 | ||
|
142 | def parentrevs(self, rev): | |
|
143 | p1, p2 = self.parents(self._indexbyrev[rev][b'node']) | |
|
144 | return self.rev(p1), self.rev(p2) | |
|
145 | ||
|
146 | def rev(self, node): | |
|
147 | validatenode(node) | |
|
148 | ||
|
149 | # Will raise KeyError. | |
|
150 | self._indexbynode[node] | |
|
151 | ||
|
152 | for rev, entry in self._indexbyrev.items(): | |
|
153 | if entry[b'node'] == node: | |
|
154 | return rev | |
|
155 | ||
|
156 | raise error.ProgrammingError('this should not occur') | |
|
157 | ||
|
158 | def node(self, rev): | |
|
159 | validaterev(rev) | |
|
160 | ||
|
161 | return self._indexbyrev[rev][b'node'] | |
|
162 | ||
|
163 | def lookup(self, node): | |
|
164 | if isinstance(node, int): | |
|
165 | return self.node(node) | |
|
166 | ||
|
167 | if len(node) == 20: | |
|
168 | try: | |
|
169 | self.rev(node) | |
|
170 | return node | |
|
171 | except LookupError: | |
|
172 | pass | |
|
173 | ||
|
174 | try: | |
|
175 | rev = int(node) | |
|
176 | if '%d' % rev != node: | |
|
177 | raise ValueError | |
|
178 | ||
|
179 | if rev < 0: | |
|
180 | rev = len(self) + rev | |
|
181 | if rev < 0 or rev >= len(self): | |
|
182 | raise ValueError | |
|
183 | ||
|
184 | return self.node(rev) | |
|
185 | except (ValueError, OverflowError): | |
|
186 | pass | |
|
187 | ||
|
188 | if len(node) == 40: | |
|
189 | try: | |
|
190 | rawnode = bin(node) | |
|
191 | self.rev(rawnode) | |
|
192 | return rawnode | |
|
193 | except (TypeError, LookupError): | |
|
194 | pass | |
|
195 | ||
|
196 | raise LookupError(node, self._path, _('invalid lookup input')) | |
|
197 | ||
|
198 | def linkrev(self, rev): | |
|
199 | validaterev(rev) | |
|
200 | ||
|
201 | return self._indexbyrev[rev][b'linkrev'] | |
|
202 | ||
|
203 | def flags(self, rev): | |
|
204 | validaterev(rev) | |
|
205 | ||
|
206 | return self._indexbyrev[rev][b'flags'] | |
|
207 | ||
|
208 | def deltaparent(self, rev): | |
|
209 | validaterev(rev) | |
|
210 | ||
|
211 | p1node = self.parents(self.node(rev))[0] | |
|
212 | return self.rev(p1node) | |
|
213 | ||
|
214 | def candelta(self, baserev, rev): | |
|
215 | validaterev(baserev) | |
|
216 | validaterev(rev) | |
|
217 | ||
|
218 | if ((self.flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS) | |
|
219 | or (self.flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)): | |
|
220 | return False | |
|
221 | ||
|
222 | return True | |
|
223 | ||
|
224 | def rawsize(self, rev): | |
|
225 | validaterev(rev) | |
|
226 | node = self.node(rev) | |
|
227 | return len(self.revision(node, raw=True)) | |
|
228 | ||
|
229 | def _processflags(self, text, flags, operation, raw=False): | |
|
230 | if flags == 0: | |
|
231 | return text, True | |
|
232 | ||
|
233 | validatehash = True | |
|
234 | # Depending on the operation (read or write), the order might be | |
|
235 | # reversed due to non-commutative transforms. | |
|
236 | orderedflags = revlog.REVIDX_FLAGS_ORDER | |
|
237 | if operation == 'write': | |
|
238 | orderedflags = reversed(orderedflags) | |
|
239 | ||
|
240 | for flag in orderedflags: | |
|
241 | # If a flagprocessor has been registered for a known flag, apply the | |
|
242 | # related operation transform and update result tuple. | |
|
243 | if flag & flags: | |
|
244 | vhash = True | |
|
245 | ||
|
246 | if flag not in revlog._flagprocessors: | |
|
247 | message = _("missing processor for flag '%#x'") % (flag) | |
|
248 | raise revlog.RevlogError(message) | |
|
249 | ||
|
250 | processor = revlog._flagprocessors[flag] | |
|
251 | if processor is not None: | |
|
252 | readtransform, writetransform, rawtransform = processor | |
|
253 | ||
|
254 | if raw: | |
|
255 | vhash = rawtransform(self, text) | |
|
256 | elif operation == 'read': | |
|
257 | text, vhash = readtransform(self, text) | |
|
258 | else: # write operation | |
|
259 | text, vhash = writetransform(self, text) | |
|
260 | validatehash = validatehash and vhash | |
|
261 | ||
|
262 | return text, validatehash | |
|
263 | ||
|
264 | def checkhash(self, text, node, p1=None, p2=None, rev=None): | |
|
265 | if p1 is None and p2 is None: | |
|
266 | p1, p2 = self.parents(node) | |
|
267 | if node != revlog.hash(text, p1, p2): | |
|
268 | raise error.RevlogError(_("integrity check failed on %s") % | |
|
269 | self._path) | |
|
270 | ||
|
271 | def revision(self, node, raw=False): | |
|
272 | validatenode(node) | |
|
273 | ||
|
274 | if node == nullid: | |
|
275 | return b'' | |
|
276 | ||
|
277 | self._indexbynode[node] | |
|
278 | ||
|
279 | rev = self.rev(node) | |
|
280 | flags = self.flags(rev) | |
|
281 | ||
|
282 | path = b'/'.join([self._storepath, hex(node)]) | |
|
283 | rawtext = self._svfs.read(path) | |
|
284 | ||
|
285 | text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw) | |
|
286 | if validatehash: | |
|
287 | self.checkhash(text, node, rev=rev) | |
|
288 | ||
|
289 | return text | |
|
290 | ||
|
291 | def read(self, node): | |
|
292 | validatenode(node) | |
|
293 | ||
|
294 | revision = self.revision(node) | |
|
295 | ||
|
296 | if not revision.startswith(b'\1\n'): | |
|
297 | return revision | |
|
298 | ||
|
299 | start = revision.index(b'\1\n', 2) | |
|
300 | return revision[start + 2:] | |
|
301 | ||
|
302 | def renamed(self, node): | |
|
303 | validatenode(node) | |
|
304 | ||
|
305 | if self.parents(node)[0] != nullid: | |
|
306 | return False | |
|
307 | ||
|
308 | fulltext = self.revision(node) | |
|
309 | m = filelog.parsemeta(fulltext)[0] | |
|
310 | ||
|
311 | if m and 'copy' in m: | |
|
312 | return m['copy'], bin(m['copyrev']) | |
|
313 | ||
|
314 | return False | |
|
315 | ||
|
316 | def cmp(self, node, text): | |
|
317 | validatenode(node) | |
|
318 | ||
|
319 | t = text | |
|
320 | ||
|
321 | if text.startswith(b'\1\n'): | |
|
322 | t = b'\1\n\1\n' + text | |
|
323 | ||
|
324 | p1, p2 = self.parents(node) | |
|
325 | ||
|
326 | if revlog.hash(t, p1, p2) == node: | |
|
327 | return False | |
|
328 | ||
|
329 | if self.iscensored(self.rev(node)): | |
|
330 | return text != b'' | |
|
331 | ||
|
332 | if self.renamed(node): | |
|
333 | t2 = self.read(node) | |
|
334 | return t2 != text | |
|
335 | ||
|
336 | return True | |
|
337 | ||
|
338 | def size(self, rev): | |
|
339 | validaterev(rev) | |
|
340 | ||
|
341 | node = self._indexbyrev[rev][b'node'] | |
|
342 | ||
|
343 | if self.renamed(node): | |
|
344 | return len(self.read(node)) | |
|
345 | ||
|
346 | if self.iscensored(rev): | |
|
347 | return 0 | |
|
348 | ||
|
349 | return len(self.revision(node)) | |
|
350 | ||
|
351 | def iscensored(self, rev): | |
|
352 | validaterev(rev) | |
|
353 | ||
|
354 | return self.flags(rev) & revlog.REVIDX_ISCENSORED | |
|
355 | ||
|
356 | def commonancestorsheads(self, a, b): | |
|
357 | validatenode(a) | |
|
358 | validatenode(b) | |
|
359 | ||
|
360 | a = self.rev(a) | |
|
361 | b = self.rev(b) | |
|
362 | ||
|
363 | ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b) | |
|
364 | return pycompat.maplist(self.node, ancestors) | |
|
365 | ||
|
366 | def descendants(self, revs): | |
|
367 | # This is a copy of revlog.descendants() | |
|
368 | first = min(revs) | |
|
369 | if first == nullrev: | |
|
370 | for i in self: | |
|
371 | yield i | |
|
372 | return | |
|
373 | ||
|
374 | seen = set(revs) | |
|
375 | for i in self.revs(start=first + 1): | |
|
376 | for x in self.parentrevs(i): | |
|
377 | if x != nullrev and x in seen: | |
|
378 | seen.add(i) | |
|
379 | yield i | |
|
380 | break | |
|
381 | ||
|
382 | # Required by verify. | |
|
383 | def files(self): | |
|
384 | entries = self._svfs.listdir(self._storepath) | |
|
385 | ||
|
386 | # Strip out undo.backup.* files created as part of transaction | |
|
387 | # recording. | |
|
388 | entries = [f for f in entries if not f.startswith('undo.backup.')] | |
|
389 | ||
|
390 | return [b'/'.join((self._storepath, f)) for f in entries] | |
|
391 | ||
|
392 | # Required by verify. | |
|
393 | def checksize(self): | |
|
394 | return 0, 0 | |
|
395 | ||
|
396 | def add(self, text, meta, transaction, linkrev, p1, p2): | |
|
397 | if meta or text.startswith(b'\1\n'): | |
|
398 | text = filelog.packmeta(meta, text) | |
|
399 | ||
|
400 | return self.addrevision(text, transaction, linkrev, p1, p2) | |
|
401 | ||
|
402 | def addrevision(self, text, transaction, linkrev, p1, p2, node=None, | |
|
403 | flags=0): | |
|
404 | validatenode(p1) | |
|
405 | validatenode(p2) | |
|
406 | ||
|
407 | if flags: | |
|
408 | node = node or revlog.hash(text, p1, p2) | |
|
409 | ||
|
410 | rawtext, validatehash = self._processflags(text, flags, 'write') | |
|
411 | ||
|
412 | node = node or revlog.hash(text, p1, p2) | |
|
413 | ||
|
414 | if node in self._indexbynode: | |
|
415 | return node | |
|
416 | ||
|
417 | if validatehash: | |
|
418 | self.checkhash(rawtext, node, p1=p1, p2=p2) | |
|
419 | ||
|
420 | path = b'/'.join([self._storepath, hex(node)]) | |
|
421 | ||
|
422 | self._svfs.write(path, text) | |
|
423 | ||
|
424 | self._indexdata.append({ | |
|
425 | b'node': node, | |
|
426 | b'p1': p1, | |
|
427 | b'p2': p2, | |
|
428 | b'linkrev': linkrev, | |
|
429 | b'flags': flags, | |
|
430 | }) | |
|
431 | ||
|
432 | self._reflectindexupdate() | |
|
433 | ||
|
434 | return node | |
|
435 | ||
|
436 | def _reflectindexupdate(self): | |
|
437 | self._refreshindex() | |
|
438 | self._svfs.write(self._indexpath, cbor.dumps(self._indexdata)) | |
|
439 | ||
|
440 | def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None): | |
|
441 | nodes = [] | |
|
442 | ||
|
443 | transaction.addbackup(self._indexpath) | |
|
444 | ||
|
445 | for node, p1, p2, linknode, deltabase, delta, flags in deltas: | |
|
446 | linkrev = linkmapper(linknode) | |
|
447 | ||
|
448 | nodes.append(node) | |
|
449 | ||
|
450 | if node in self._indexbynode: | |
|
451 | continue | |
|
452 | ||
|
453 | # Need to resolve the fulltext from the delta base. | |
|
454 | if deltabase == nullid: | |
|
455 | text = mdiff.patch(b'', delta) | |
|
456 | else: | |
|
457 | text = mdiff.patch(self.revision(deltabase), delta) | |
|
458 | ||
|
459 | self.addrevision(text, transaction, linkrev, p1, p2, flags) | |
|
460 | ||
|
461 | if addrevisioncb: | |
|
462 | addrevisioncb(self, node) | |
|
463 | ||
|
464 | return nodes | |
|
465 | ||
|
466 | def revdiff(self, rev1, rev2): | |
|
467 | validaterev(rev1) | |
|
468 | validaterev(rev2) | |
|
469 | ||
|
470 | node1 = self.node(rev1) | |
|
471 | node2 = self.node(rev2) | |
|
472 | ||
|
473 | return mdiff.textdiff(self.revision(node1, raw=True), | |
|
474 | self.revision(node2, raw=True)) | |
|
475 | ||
|
476 | def headrevs(self): | |
|
477 | # Assume all revisions are heads by default. | |
|
478 | ishead = {rev: True for rev in self._indexbyrev} | |
|
479 | ||
|
480 | for rev, entry in self._indexbyrev.items(): | |
|
481 | # Unset head flag for all seen parents. | |
|
482 | ishead[self.rev(entry[b'p1'])] = False | |
|
483 | ishead[self.rev(entry[b'p2'])] = False | |
|
484 | ||
|
485 | return [rev for rev, ishead in sorted(ishead.items()) | |
|
486 | if ishead] | |
|
487 | ||
|
488 | def heads(self, start=None, stop=None): | |
|
489 | # This is copied from revlog.py. | |
|
490 | if start is None and stop is None: | |
|
491 | if not len(self): | |
|
492 | return [nullid] | |
|
493 | return [self.node(r) for r in self.headrevs()] | |
|
494 | ||
|
495 | if start is None: | |
|
496 | start = nullid | |
|
497 | if stop is None: | |
|
498 | stop = [] | |
|
499 | stoprevs = set([self.rev(n) for n in stop]) | |
|
500 | startrev = self.rev(start) | |
|
501 | reachable = {startrev} | |
|
502 | heads = {startrev} | |
|
503 | ||
|
504 | parentrevs = self.parentrevs | |
|
505 | for r in self.revs(start=startrev + 1): | |
|
506 | for p in parentrevs(r): | |
|
507 | if p in reachable: | |
|
508 | if r not in stoprevs: | |
|
509 | reachable.add(r) | |
|
510 | heads.add(r) | |
|
511 | if p in heads and p not in stoprevs: | |
|
512 | heads.remove(p) | |
|
513 | ||
|
514 | return [self.node(r) for r in heads] | |
|
515 | ||
|
516 | def children(self, node): | |
|
517 | validatenode(node) | |
|
518 | ||
|
519 | # This is a copy of revlog.children(). | |
|
520 | c = [] | |
|
521 | p = self.rev(node) | |
|
522 | for r in self.revs(start=p + 1): | |
|
523 | prevs = [pr for pr in self.parentrevs(r) if pr != nullrev] | |
|
524 | if prevs: | |
|
525 | for pr in prevs: | |
|
526 | if pr == p: | |
|
527 | c.append(self.node(r)) | |
|
528 | elif p == nullrev: | |
|
529 | c.append(self.node(r)) | |
|
530 | return c | |
|
531 | ||
|
532 | def getstrippoint(self, minlink): | |
|
533 | ||
|
534 | # This is largely a copy of revlog.getstrippoint(). | |
|
535 | brokenrevs = set() | |
|
536 | strippoint = len(self) | |
|
537 | ||
|
538 | heads = {} | |
|
539 | futurelargelinkrevs = set() | |
|
540 | for head in self.headrevs(): | |
|
541 | headlinkrev = self.linkrev(head) | |
|
542 | heads[head] = headlinkrev | |
|
543 | if headlinkrev >= minlink: | |
|
544 | futurelargelinkrevs.add(headlinkrev) | |
|
545 | ||
|
546 | # This algorithm involves walking down the rev graph, starting at the | |
|
547 | # heads. Since the revs are topologically sorted according to linkrev, | |
|
548 | # once all head linkrevs are below the minlink, we know there are | |
|
549 | # no more revs that could have a linkrev greater than minlink. | |
|
550 | # So we can stop walking. | |
|
551 | while futurelargelinkrevs: | |
|
552 | strippoint -= 1 | |
|
553 | linkrev = heads.pop(strippoint) | |
|
554 | ||
|
555 | if linkrev < minlink: | |
|
556 | brokenrevs.add(strippoint) | |
|
557 | else: | |
|
558 | futurelargelinkrevs.remove(linkrev) | |
|
559 | ||
|
560 | for p in self.parentrevs(strippoint): | |
|
561 | if p != nullrev: | |
|
562 | plinkrev = self.linkrev(p) | |
|
563 | heads[p] = plinkrev | |
|
564 | if plinkrev >= minlink: | |
|
565 | futurelargelinkrevs.add(plinkrev) | |
|
566 | ||
|
567 | return strippoint, brokenrevs | |
|
568 | ||
|
569 | def strip(self, minlink, transaction): | |
|
570 | if not len(self): | |
|
571 | return | |
|
572 | ||
|
573 | rev, _ignored = self.getstrippoint(minlink) | |
|
574 | if rev == len(self): | |
|
575 | return | |
|
576 | ||
|
577 | # Purge index data starting at the requested revision. | |
|
578 | self._indexdata[rev:] = [] | |
|
579 | self._reflectindexupdate() | |
|
580 | ||
|
581 | def reposetup(ui, repo): | |
|
582 | if not repo.local(): | |
|
583 | return | |
|
584 | ||
|
585 | class simplestorerepo(repo.__class__): | |
|
586 | def file(self, f): | |
|
587 | return filestorage(self.svfs, f) | |
|
588 | ||
|
589 | repo.__class__ = simplestorerepo |
General Comments 0
You need to be logged in to leave comments.
Login now