##// END OF EJS Templates
simplestore: shore up lookup errors...
Gregory Szorc -
r37426:dd275372 default
parent child Browse files
Show More
@@ -1,598 +1,595 b''
1 1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # To use this with the test suite:
9 9 #
10 10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
11 11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
12 12
13 13 from __future__ import absolute_import
14 14
15 15 from mercurial.i18n import _
16 16 from mercurial.node import (
17 17 bin,
18 18 hex,
19 19 nullid,
20 20 nullrev,
21 21 )
22 22 from mercurial.thirdparty import (
23 23 cbor,
24 24 )
25 25 from mercurial import (
26 26 ancestor,
27 27 bundlerepo,
28 28 error,
29 29 filelog,
30 30 mdiff,
31 31 pycompat,
32 32 revlog,
33 33 )
34 34
35 35 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
36 36 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
37 37 # be specifying the version(s) of Mercurial they are tested with, or
38 38 # leave the attribute unspecified.
39 39 testedwith = 'ships-with-hg-core'
40 40
41 41 def validatenode(node):
42 42 if isinstance(node, int):
43 43 raise ValueError('expected node; got int')
44 44
45 45 if len(node) != 20:
46 46 raise ValueError('expected 20 byte node')
47 47
48 48 def validaterev(rev):
49 49 if not isinstance(rev, int):
50 50 raise ValueError('expected int')
51 51
52 52 class filestorage(object):
53 53 """Implements storage for a tracked path.
54 54
55 55 Data is stored in the VFS in a directory corresponding to the tracked
56 56 path.
57 57
58 58 Index data is stored in an ``index`` file using CBOR.
59 59
60 60 Fulltext data is stored in files having names of the node.
61 61 """
62 62
63 63 def __init__(self, svfs, path):
64 64 self._svfs = svfs
65 65 self._path = path
66 66
67 67 self._storepath = b'/'.join([b'data', path])
68 68 self._indexpath = b'/'.join([self._storepath, b'index'])
69 69
70 70 indexdata = self._svfs.tryread(self._indexpath)
71 71 if indexdata:
72 72 indexdata = cbor.loads(indexdata)
73 73
74 74 self._indexdata = indexdata or []
75 75 self._indexbynode = {}
76 76 self._indexbyrev = {}
77 77 self.index = []
78 78 self._refreshindex()
79 79
80 80 # This is used by changegroup code :/
81 81 self._generaldelta = True
82 82 self.storedeltachains = False
83 83
84 84 self.version = 1
85 85
86 86 def _refreshindex(self):
87 87 self._indexbynode.clear()
88 88 self._indexbyrev.clear()
89 89 self.index = []
90 90
91 91 for i, entry in enumerate(self._indexdata):
92 92 self._indexbynode[entry[b'node']] = entry
93 93 self._indexbyrev[i] = entry
94 94
95 95 self._indexbynode[nullid] = {
96 96 b'node': nullid,
97 97 b'p1': nullid,
98 98 b'p2': nullid,
99 99 b'linkrev': nullrev,
100 100 b'flags': 0,
101 101 }
102 102
103 103 self._indexbyrev[nullrev] = {
104 104 b'node': nullid,
105 105 b'p1': nullid,
106 106 b'p2': nullid,
107 107 b'linkrev': nullrev,
108 108 b'flags': 0,
109 109 }
110 110
111 111 for i, entry in enumerate(self._indexdata):
112 112 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
113 113
114 114 # start, length, rawsize, chainbase, linkrev, p1, p2, node
115 115 self.index.append((0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev,
116 116 entry[b'node']))
117 117
118 118 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
119 119
120 120 def __len__(self):
121 121 return len(self._indexdata)
122 122
123 123 def __iter__(self):
124 124 return iter(range(len(self)))
125 125
126 126 def revs(self, start=0, stop=None):
127 127 step = 1
128 128 if stop is not None:
129 129 if start > stop:
130 130 step = -1
131 131
132 132 stop += step
133 133 else:
134 134 stop = len(self)
135 135
136 136 return range(start, stop, step)
137 137
138 138 def parents(self, node):
139 139 validatenode(node)
140 140
141 141 if node not in self._indexbynode:
142 142 raise KeyError('unknown node')
143 143
144 144 entry = self._indexbynode[node]
145 145
146 146 return entry[b'p1'], entry[b'p2']
147 147
148 148 def parentrevs(self, rev):
149 149 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
150 150 return self.rev(p1), self.rev(p2)
151 151
152 152 def rev(self, node):
153 153 validatenode(node)
154 154
155 # Will raise KeyError.
156 self._indexbynode[node]
155 try:
156 self._indexbynode[node]
157 except KeyError:
158 raise error.LookupError(node, self._indexpath, _('no node'))
157 159
158 160 for rev, entry in self._indexbyrev.items():
159 161 if entry[b'node'] == node:
160 162 return rev
161 163
162 164 raise error.ProgrammingError('this should not occur')
163 165
164 166 def node(self, rev):
165 167 validaterev(rev)
166 168
167 169 return self._indexbyrev[rev][b'node']
168 170
169 171 def lookup(self, node):
170 172 if isinstance(node, int):
171 173 return self.node(node)
172 174
173 175 if len(node) == 20:
174 try:
175 self.rev(node)
176 return node
177 except LookupError:
178 pass
176 self.rev(node)
177 return node
179 178
180 179 try:
181 180 rev = int(node)
182 181 if '%d' % rev != node:
183 182 raise ValueError
184 183
185 184 if rev < 0:
186 185 rev = len(self) + rev
187 186 if rev < 0 or rev >= len(self):
188 187 raise ValueError
189 188
190 189 return self.node(rev)
191 190 except (ValueError, OverflowError):
192 191 pass
193 192
194 193 if len(node) == 40:
195 194 try:
196 195 rawnode = bin(node)
197 196 self.rev(rawnode)
198 197 return rawnode
199 except (TypeError, LookupError):
198 except TypeError:
200 199 pass
201 200
202 raise LookupError(node, self._path, _('invalid lookup input'))
201 raise error.LookupError(node, self._path, _('invalid lookup input'))
203 202
204 203 def linkrev(self, rev):
205 204 validaterev(rev)
206 205
207 206 return self._indexbyrev[rev][b'linkrev']
208 207
209 208 def flags(self, rev):
210 209 validaterev(rev)
211 210
212 211 return self._indexbyrev[rev][b'flags']
213 212
214 213 def deltaparent(self, rev):
215 214 validaterev(rev)
216 215
217 216 p1node = self.parents(self.node(rev))[0]
218 217 return self.rev(p1node)
219 218
220 219 def candelta(self, baserev, rev):
221 220 validaterev(baserev)
222 221 validaterev(rev)
223 222
224 223 if ((self.flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)
225 224 or (self.flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)):
226 225 return False
227 226
228 227 return True
229 228
230 229 def rawsize(self, rev):
231 230 validaterev(rev)
232 231 node = self.node(rev)
233 232 return len(self.revision(node, raw=True))
234 233
235 234 def _processflags(self, text, flags, operation, raw=False):
236 235 if flags == 0:
237 236 return text, True
238 237
239 238 validatehash = True
240 239 # Depending on the operation (read or write), the order might be
241 240 # reversed due to non-commutative transforms.
242 241 orderedflags = revlog.REVIDX_FLAGS_ORDER
243 242 if operation == 'write':
244 243 orderedflags = reversed(orderedflags)
245 244
246 245 for flag in orderedflags:
247 246 # If a flagprocessor has been registered for a known flag, apply the
248 247 # related operation transform and update result tuple.
249 248 if flag & flags:
250 249 vhash = True
251 250
252 251 if flag not in revlog._flagprocessors:
253 252 message = _("missing processor for flag '%#x'") % (flag)
254 253 raise revlog.RevlogError(message)
255 254
256 255 processor = revlog._flagprocessors[flag]
257 256 if processor is not None:
258 257 readtransform, writetransform, rawtransform = processor
259 258
260 259 if raw:
261 260 vhash = rawtransform(self, text)
262 261 elif operation == 'read':
263 262 text, vhash = readtransform(self, text)
264 263 else: # write operation
265 264 text, vhash = writetransform(self, text)
266 265 validatehash = validatehash and vhash
267 266
268 267 return text, validatehash
269 268
270 269 def checkhash(self, text, node, p1=None, p2=None, rev=None):
271 270 if p1 is None and p2 is None:
272 271 p1, p2 = self.parents(node)
273 272 if node != revlog.hash(text, p1, p2):
274 273 raise error.RevlogError(_("integrity check failed on %s") %
275 274 self._path)
276 275
277 276 def revision(self, node, raw=False):
278 277 validatenode(node)
279 278
280 279 if node == nullid:
281 280 return b''
282 281
283 self._indexbynode[node]
284
285 282 rev = self.rev(node)
286 283 flags = self.flags(rev)
287 284
288 285 path = b'/'.join([self._storepath, hex(node)])
289 286 rawtext = self._svfs.read(path)
290 287
291 288 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
292 289 if validatehash:
293 290 self.checkhash(text, node, rev=rev)
294 291
295 292 return text
296 293
297 294 def read(self, node):
298 295 validatenode(node)
299 296
300 297 revision = self.revision(node)
301 298
302 299 if not revision.startswith(b'\1\n'):
303 300 return revision
304 301
305 302 start = revision.index(b'\1\n', 2)
306 303 return revision[start + 2:]
307 304
308 305 def renamed(self, node):
309 306 validatenode(node)
310 307
311 308 if self.parents(node)[0] != nullid:
312 309 return False
313 310
314 311 fulltext = self.revision(node)
315 312 m = filelog.parsemeta(fulltext)[0]
316 313
317 314 if m and 'copy' in m:
318 315 return m['copy'], bin(m['copyrev'])
319 316
320 317 return False
321 318
322 319 def cmp(self, node, text):
323 320 validatenode(node)
324 321
325 322 t = text
326 323
327 324 if text.startswith(b'\1\n'):
328 325 t = b'\1\n\1\n' + text
329 326
330 327 p1, p2 = self.parents(node)
331 328
332 329 if revlog.hash(t, p1, p2) == node:
333 330 return False
334 331
335 332 if self.iscensored(self.rev(node)):
336 333 return text != b''
337 334
338 335 if self.renamed(node):
339 336 t2 = self.read(node)
340 337 return t2 != text
341 338
342 339 return True
343 340
344 341 def size(self, rev):
345 342 validaterev(rev)
346 343
347 344 node = self._indexbyrev[rev][b'node']
348 345
349 346 if self.renamed(node):
350 347 return len(self.read(node))
351 348
352 349 if self.iscensored(rev):
353 350 return 0
354 351
355 352 return len(self.revision(node))
356 353
357 354 def iscensored(self, rev):
358 355 validaterev(rev)
359 356
360 357 return self.flags(rev) & revlog.REVIDX_ISCENSORED
361 358
362 359 def commonancestorsheads(self, a, b):
363 360 validatenode(a)
364 361 validatenode(b)
365 362
366 363 a = self.rev(a)
367 364 b = self.rev(b)
368 365
369 366 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
370 367 return pycompat.maplist(self.node, ancestors)
371 368
372 369 def descendants(self, revs):
373 370 # This is a copy of revlog.descendants()
374 371 first = min(revs)
375 372 if first == nullrev:
376 373 for i in self:
377 374 yield i
378 375 return
379 376
380 377 seen = set(revs)
381 378 for i in self.revs(start=first + 1):
382 379 for x in self.parentrevs(i):
383 380 if x != nullrev and x in seen:
384 381 seen.add(i)
385 382 yield i
386 383 break
387 384
388 385 # Required by verify.
389 386 def files(self):
390 387 entries = self._svfs.listdir(self._storepath)
391 388
392 389 # Strip out undo.backup.* files created as part of transaction
393 390 # recording.
394 391 entries = [f for f in entries if not f.startswith('undo.backup.')]
395 392
396 393 return [b'/'.join((self._storepath, f)) for f in entries]
397 394
398 395 # Required by verify.
399 396 def checksize(self):
400 397 return 0, 0
401 398
402 399 def add(self, text, meta, transaction, linkrev, p1, p2):
403 400 if meta or text.startswith(b'\1\n'):
404 401 text = filelog.packmeta(meta, text)
405 402
406 403 return self.addrevision(text, transaction, linkrev, p1, p2)
407 404
408 405 def addrevision(self, text, transaction, linkrev, p1, p2, node=None,
409 406 flags=0):
410 407 validatenode(p1)
411 408 validatenode(p2)
412 409
413 410 if flags:
414 411 node = node or revlog.hash(text, p1, p2)
415 412
416 413 rawtext, validatehash = self._processflags(text, flags, 'write')
417 414
418 415 node = node or revlog.hash(text, p1, p2)
419 416
420 417 if node in self._indexbynode:
421 418 return node
422 419
423 420 if validatehash:
424 421 self.checkhash(rawtext, node, p1=p1, p2=p2)
425 422
426 423 path = b'/'.join([self._storepath, hex(node)])
427 424
428 425 self._svfs.write(path, text)
429 426
430 427 self._indexdata.append({
431 428 b'node': node,
432 429 b'p1': p1,
433 430 b'p2': p2,
434 431 b'linkrev': linkrev,
435 432 b'flags': flags,
436 433 })
437 434
438 435 self._reflectindexupdate()
439 436
440 437 return node
441 438
442 439 def _reflectindexupdate(self):
443 440 self._refreshindex()
444 441 self._svfs.write(self._indexpath, cbor.dumps(self._indexdata))
445 442
446 443 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
447 444 nodes = []
448 445
449 446 transaction.addbackup(self._indexpath)
450 447
451 448 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
452 449 linkrev = linkmapper(linknode)
453 450
454 451 nodes.append(node)
455 452
456 453 if node in self._indexbynode:
457 454 continue
458 455
459 456 # Need to resolve the fulltext from the delta base.
460 457 if deltabase == nullid:
461 458 text = mdiff.patch(b'', delta)
462 459 else:
463 460 text = mdiff.patch(self.revision(deltabase), delta)
464 461
465 462 self.addrevision(text, transaction, linkrev, p1, p2, flags)
466 463
467 464 if addrevisioncb:
468 465 addrevisioncb(self, node)
469 466
470 467 return nodes
471 468
472 469 def revdiff(self, rev1, rev2):
473 470 validaterev(rev1)
474 471 validaterev(rev2)
475 472
476 473 node1 = self.node(rev1)
477 474 node2 = self.node(rev2)
478 475
479 476 return mdiff.textdiff(self.revision(node1, raw=True),
480 477 self.revision(node2, raw=True))
481 478
482 479 def headrevs(self):
483 480 # Assume all revisions are heads by default.
484 481 revishead = {rev: True for rev in self._indexbyrev}
485 482
486 483 for rev, entry in self._indexbyrev.items():
487 484 # Unset head flag for all seen parents.
488 485 revishead[self.rev(entry[b'p1'])] = False
489 486 revishead[self.rev(entry[b'p2'])] = False
490 487
491 488 return [rev for rev, ishead in sorted(revishead.items())
492 489 if ishead]
493 490
494 491 def heads(self, start=None, stop=None):
495 492 # This is copied from revlog.py.
496 493 if start is None and stop is None:
497 494 if not len(self):
498 495 return [nullid]
499 496 return [self.node(r) for r in self.headrevs()]
500 497
501 498 if start is None:
502 499 start = nullid
503 500 if stop is None:
504 501 stop = []
505 502 stoprevs = set([self.rev(n) for n in stop])
506 503 startrev = self.rev(start)
507 504 reachable = {startrev}
508 505 heads = {startrev}
509 506
510 507 parentrevs = self.parentrevs
511 508 for r in self.revs(start=startrev + 1):
512 509 for p in parentrevs(r):
513 510 if p in reachable:
514 511 if r not in stoprevs:
515 512 reachable.add(r)
516 513 heads.add(r)
517 514 if p in heads and p not in stoprevs:
518 515 heads.remove(p)
519 516
520 517 return [self.node(r) for r in heads]
521 518
522 519 def children(self, node):
523 520 validatenode(node)
524 521
525 522 # This is a copy of revlog.children().
526 523 c = []
527 524 p = self.rev(node)
528 525 for r in self.revs(start=p + 1):
529 526 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
530 527 if prevs:
531 528 for pr in prevs:
532 529 if pr == p:
533 530 c.append(self.node(r))
534 531 elif p == nullrev:
535 532 c.append(self.node(r))
536 533 return c
537 534
538 535 def getstrippoint(self, minlink):
539 536
540 537 # This is largely a copy of revlog.getstrippoint().
541 538 brokenrevs = set()
542 539 strippoint = len(self)
543 540
544 541 heads = {}
545 542 futurelargelinkrevs = set()
546 543 for head in self.headrevs():
547 544 headlinkrev = self.linkrev(head)
548 545 heads[head] = headlinkrev
549 546 if headlinkrev >= minlink:
550 547 futurelargelinkrevs.add(headlinkrev)
551 548
552 549 # This algorithm involves walking down the rev graph, starting at the
553 550 # heads. Since the revs are topologically sorted according to linkrev,
554 551 # once all head linkrevs are below the minlink, we know there are
555 552 # no more revs that could have a linkrev greater than minlink.
556 553 # So we can stop walking.
557 554 while futurelargelinkrevs:
558 555 strippoint -= 1
559 556 linkrev = heads.pop(strippoint)
560 557
561 558 if linkrev < minlink:
562 559 brokenrevs.add(strippoint)
563 560 else:
564 561 futurelargelinkrevs.remove(linkrev)
565 562
566 563 for p in self.parentrevs(strippoint):
567 564 if p != nullrev:
568 565 plinkrev = self.linkrev(p)
569 566 heads[p] = plinkrev
570 567 if plinkrev >= minlink:
571 568 futurelargelinkrevs.add(plinkrev)
572 569
573 570 return strippoint, brokenrevs
574 571
575 572 def strip(self, minlink, transaction):
576 573 if not len(self):
577 574 return
578 575
579 576 rev, _ignored = self.getstrippoint(minlink)
580 577 if rev == len(self):
581 578 return
582 579
583 580 # Purge index data starting at the requested revision.
584 581 self._indexdata[rev:] = []
585 582 self._reflectindexupdate()
586 583
587 584 def reposetup(ui, repo):
588 585 if not repo.local():
589 586 return
590 587
591 588 if isinstance(repo, bundlerepo.bundlerepository):
592 589 raise error.Abort(_('cannot use simple store with bundlerepo'))
593 590
594 591 class simplestorerepo(repo.__class__):
595 592 def file(self, f):
596 593 return filestorage(self.svfs, f)
597 594
598 595 repo.__class__ = simplestorerepo
General Comments 0
You need to be logged in to leave comments. Login now