##// END OF EJS Templates
verify: add check for mismatch of index and data length
Matt Mackall -
r1493:1a216cb4 default
parent child Browse files
Show More
@@ -1,1736 +1,1746 b''
1 1 # localrepo.py - read/write repository class for mercurial
2 2 #
3 3 # Copyright 2005 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms
6 6 # of the GNU General Public License, incorporated herein by reference.
7 7
8 8 import struct, os, util
9 9 import filelog, manifest, changelog, dirstate, repo
10 10 from node import *
11 11 from i18n import gettext as _
12 12 from demandload import *
13 13 demandload(globals(), "re lock transaction tempfile stat mdiff errno")
14 14
15 15 class localrepository:
16 16 def __init__(self, ui, path=None, create=0):
17 17 if not path:
18 18 p = os.getcwd()
19 19 while not os.path.isdir(os.path.join(p, ".hg")):
20 20 oldp = p
21 21 p = os.path.dirname(p)
22 22 if p == oldp: raise repo.RepoError(_("no repo found"))
23 23 path = p
24 24 self.path = os.path.join(path, ".hg")
25 25
26 26 if not create and not os.path.isdir(self.path):
27 27 raise repo.RepoError(_("repository %s not found") % self.path)
28 28
29 29 self.root = os.path.abspath(path)
30 30 self.ui = ui
31 31 self.opener = util.opener(self.path)
32 32 self.wopener = util.opener(self.root)
33 33 self.manifest = manifest.manifest(self.opener)
34 34 self.changelog = changelog.changelog(self.opener)
35 35 self.tagscache = None
36 36 self.nodetagscache = None
37 37 self.encodepats = None
38 38 self.decodepats = None
39 39
40 40 if create:
41 41 os.mkdir(self.path)
42 42 os.mkdir(self.join("data"))
43 43
44 44 self.dirstate = dirstate.dirstate(self.opener, ui, self.root)
45 45 try:
46 46 self.ui.readconfig(os.path.join(self.path, "hgrc"))
47 47 except IOError: pass
48 48
49 49 def hook(self, name, **args):
50 50 def runhook(name, cmd):
51 51 self.ui.note(_("running hook %s: %s\n") % (name, cmd))
52 52 old = {}
53 53 for k, v in args.items():
54 54 k = k.upper()
55 55 old[k] = os.environ.get(k, None)
56 56 os.environ[k] = v
57 57
58 58 # Hooks run in the repository root
59 59 olddir = os.getcwd()
60 60 os.chdir(self.root)
61 61 r = os.system(cmd)
62 62 os.chdir(olddir)
63 63
64 64 for k, v in old.items():
65 65 if v != None:
66 66 os.environ[k] = v
67 67 else:
68 68 del os.environ[k]
69 69
70 70 if r:
71 71 self.ui.warn(_("abort: %s hook failed with status %d!\n") %
72 72 (name, r))
73 73 return False
74 74 return True
75 75
76 76 r = True
77 77 for hname, cmd in self.ui.configitems("hooks"):
78 78 s = hname.split(".")
79 79 if s[0] == name and cmd:
80 80 r = runhook(hname, cmd) and r
81 81 return r
82 82
83 83 def tags(self):
84 84 '''return a mapping of tag to node'''
85 85 if not self.tagscache:
86 86 self.tagscache = {}
87 87 def addtag(self, k, n):
88 88 try:
89 89 bin_n = bin(n)
90 90 except TypeError:
91 91 bin_n = ''
92 92 self.tagscache[k.strip()] = bin_n
93 93
94 94 try:
95 95 # read each head of the tags file, ending with the tip
96 96 # and add each tag found to the map, with "newer" ones
97 97 # taking precedence
98 98 fl = self.file(".hgtags")
99 99 h = fl.heads()
100 100 h.reverse()
101 101 for r in h:
102 102 for l in fl.read(r).splitlines():
103 103 if l:
104 104 n, k = l.split(" ", 1)
105 105 addtag(self, k, n)
106 106 except KeyError:
107 107 pass
108 108
109 109 try:
110 110 f = self.opener("localtags")
111 111 for l in f:
112 112 n, k = l.split(" ", 1)
113 113 addtag(self, k, n)
114 114 except IOError:
115 115 pass
116 116
117 117 self.tagscache['tip'] = self.changelog.tip()
118 118
119 119 return self.tagscache
120 120
121 121 def tagslist(self):
122 122 '''return a list of tags ordered by revision'''
123 123 l = []
124 124 for t, n in self.tags().items():
125 125 try:
126 126 r = self.changelog.rev(n)
127 127 except:
128 128 r = -2 # sort to the beginning of the list if unknown
129 129 l.append((r,t,n))
130 130 l.sort()
131 131 return [(t,n) for r,t,n in l]
132 132
133 133 def nodetags(self, node):
134 134 '''return the tags associated with a node'''
135 135 if not self.nodetagscache:
136 136 self.nodetagscache = {}
137 137 for t,n in self.tags().items():
138 138 self.nodetagscache.setdefault(n,[]).append(t)
139 139 return self.nodetagscache.get(node, [])
140 140
141 141 def lookup(self, key):
142 142 try:
143 143 return self.tags()[key]
144 144 except KeyError:
145 145 try:
146 146 return self.changelog.lookup(key)
147 147 except:
148 148 raise repo.RepoError(_("unknown revision '%s'") % key)
149 149
150 150 def dev(self):
151 151 return os.stat(self.path).st_dev
152 152
153 153 def local(self):
154 154 return True
155 155
156 156 def join(self, f):
157 157 return os.path.join(self.path, f)
158 158
159 159 def wjoin(self, f):
160 160 return os.path.join(self.root, f)
161 161
162 162 def file(self, f):
163 163 if f[0] == '/': f = f[1:]
164 164 return filelog.filelog(self.opener, f)
165 165
166 166 def getcwd(self):
167 167 return self.dirstate.getcwd()
168 168
169 169 def wfile(self, f, mode='r'):
170 170 return self.wopener(f, mode)
171 171
172 172 def wread(self, filename):
173 173 if self.encodepats == None:
174 174 l = []
175 175 for pat, cmd in self.ui.configitems("encode"):
176 176 mf = util.matcher("", "/", [pat], [], [])[1]
177 177 l.append((mf, cmd))
178 178 self.encodepats = l
179 179
180 180 data = self.wopener(filename, 'r').read()
181 181
182 182 for mf, cmd in self.encodepats:
183 183 if mf(filename):
184 184 self.ui.debug(_("filtering %s through %s\n") % (filename, cmd))
185 185 data = util.filter(data, cmd)
186 186 break
187 187
188 188 return data
189 189
190 190 def wwrite(self, filename, data, fd=None):
191 191 if self.decodepats == None:
192 192 l = []
193 193 for pat, cmd in self.ui.configitems("decode"):
194 194 mf = util.matcher("", "/", [pat], [], [])[1]
195 195 l.append((mf, cmd))
196 196 self.decodepats = l
197 197
198 198 for mf, cmd in self.decodepats:
199 199 if mf(filename):
200 200 self.ui.debug(_("filtering %s through %s\n") % (filename, cmd))
201 201 data = util.filter(data, cmd)
202 202 break
203 203
204 204 if fd:
205 205 return fd.write(data)
206 206 return self.wopener(filename, 'w').write(data)
207 207
208 208 def transaction(self):
209 209 # save dirstate for undo
210 210 try:
211 211 ds = self.opener("dirstate").read()
212 212 except IOError:
213 213 ds = ""
214 214 self.opener("journal.dirstate", "w").write(ds)
215 215
216 216 def after():
217 217 util.rename(self.join("journal"), self.join("undo"))
218 218 util.rename(self.join("journal.dirstate"),
219 219 self.join("undo.dirstate"))
220 220
221 221 return transaction.transaction(self.ui.warn, self.opener,
222 222 self.join("journal"), after)
223 223
224 224 def recover(self):
225 225 lock = self.lock()
226 226 if os.path.exists(self.join("journal")):
227 227 self.ui.status(_("rolling back interrupted transaction\n"))
228 228 return transaction.rollback(self.opener, self.join("journal"))
229 229 else:
230 230 self.ui.warn(_("no interrupted transaction available\n"))
231 231
232 232 def undo(self):
233 233 lock = self.lock()
234 234 if os.path.exists(self.join("undo")):
235 235 self.ui.status(_("rolling back last transaction\n"))
236 236 transaction.rollback(self.opener, self.join("undo"))
237 237 self.dirstate = None
238 238 util.rename(self.join("undo.dirstate"), self.join("dirstate"))
239 239 self.dirstate = dirstate.dirstate(self.opener, self.ui, self.root)
240 240 else:
241 241 self.ui.warn(_("no undo information available\n"))
242 242
243 243 def lock(self, wait=1):
244 244 try:
245 245 return lock.lock(self.join("lock"), 0)
246 246 except lock.LockHeld, inst:
247 247 if wait:
248 248 self.ui.warn(_("waiting for lock held by %s\n") % inst.args[0])
249 249 return lock.lock(self.join("lock"), wait)
250 250 raise inst
251 251
252 252 def rawcommit(self, files, text, user, date, p1=None, p2=None):
253 253 orig_parent = self.dirstate.parents()[0] or nullid
254 254 p1 = p1 or self.dirstate.parents()[0] or nullid
255 255 p2 = p2 or self.dirstate.parents()[1] or nullid
256 256 c1 = self.changelog.read(p1)
257 257 c2 = self.changelog.read(p2)
258 258 m1 = self.manifest.read(c1[0])
259 259 mf1 = self.manifest.readflags(c1[0])
260 260 m2 = self.manifest.read(c2[0])
261 261 changed = []
262 262
263 263 if orig_parent == p1:
264 264 update_dirstate = 1
265 265 else:
266 266 update_dirstate = 0
267 267
268 268 tr = self.transaction()
269 269 mm = m1.copy()
270 270 mfm = mf1.copy()
271 271 linkrev = self.changelog.count()
272 272 for f in files:
273 273 try:
274 274 t = self.wread(f)
275 275 tm = util.is_exec(self.wjoin(f), mfm.get(f, False))
276 276 r = self.file(f)
277 277 mfm[f] = tm
278 278
279 279 fp1 = m1.get(f, nullid)
280 280 fp2 = m2.get(f, nullid)
281 281
282 282 # is the same revision on two branches of a merge?
283 283 if fp2 == fp1:
284 284 fp2 = nullid
285 285
286 286 if fp2 != nullid:
287 287 # is one parent an ancestor of the other?
288 288 fpa = r.ancestor(fp1, fp2)
289 289 if fpa == fp1:
290 290 fp1, fp2 = fp2, nullid
291 291 elif fpa == fp2:
292 292 fp2 = nullid
293 293
294 294 # is the file unmodified from the parent?
295 295 if t == r.read(fp1):
296 296 # record the proper existing parent in manifest
297 297 # no need to add a revision
298 298 mm[f] = fp1
299 299 continue
300 300
301 301 mm[f] = r.add(t, {}, tr, linkrev, fp1, fp2)
302 302 changed.append(f)
303 303 if update_dirstate:
304 304 self.dirstate.update([f], "n")
305 305 except IOError:
306 306 try:
307 307 del mm[f]
308 308 del mfm[f]
309 309 if update_dirstate:
310 310 self.dirstate.forget([f])
311 311 except:
312 312 # deleted from p2?
313 313 pass
314 314
315 315 mnode = self.manifest.add(mm, mfm, tr, linkrev, c1[0], c2[0])
316 316 user = user or self.ui.username()
317 317 n = self.changelog.add(mnode, changed, text, tr, p1, p2, user, date)
318 318 tr.close()
319 319 if update_dirstate:
320 320 self.dirstate.setparents(n, nullid)
321 321
322 322 def commit(self, files = None, text = "", user = None, date = None,
323 323 match = util.always, force=False):
324 324 commit = []
325 325 remove = []
326 326 changed = []
327 327
328 328 if files:
329 329 for f in files:
330 330 s = self.dirstate.state(f)
331 331 if s in 'nmai':
332 332 commit.append(f)
333 333 elif s == 'r':
334 334 remove.append(f)
335 335 else:
336 336 self.ui.warn(_("%s not tracked!\n") % f)
337 337 else:
338 338 (c, a, d, u) = self.changes(match=match)
339 339 commit = c + a
340 340 remove = d
341 341
342 342 p1, p2 = self.dirstate.parents()
343 343 c1 = self.changelog.read(p1)
344 344 c2 = self.changelog.read(p2)
345 345 m1 = self.manifest.read(c1[0])
346 346 mf1 = self.manifest.readflags(c1[0])
347 347 m2 = self.manifest.read(c2[0])
348 348
349 349 if not commit and not remove and not force and p2 == nullid:
350 350 self.ui.status(_("nothing changed\n"))
351 351 return None
352 352
353 353 if not self.hook("precommit"):
354 354 return None
355 355
356 356 lock = self.lock()
357 357 tr = self.transaction()
358 358
359 359 # check in files
360 360 new = {}
361 361 linkrev = self.changelog.count()
362 362 commit.sort()
363 363 for f in commit:
364 364 self.ui.note(f + "\n")
365 365 try:
366 366 mf1[f] = util.is_exec(self.wjoin(f), mf1.get(f, False))
367 367 t = self.wread(f)
368 368 except IOError:
369 369 self.ui.warn(_("trouble committing %s!\n") % f)
370 370 raise
371 371
372 372 r = self.file(f)
373 373
374 374 meta = {}
375 375 cp = self.dirstate.copied(f)
376 376 if cp:
377 377 meta["copy"] = cp
378 378 meta["copyrev"] = hex(m1.get(cp, m2.get(cp, nullid)))
379 379 self.ui.debug(_(" %s: copy %s:%s\n") % (f, cp, meta["copyrev"]))
380 380 fp1, fp2 = nullid, nullid
381 381 else:
382 382 fp1 = m1.get(f, nullid)
383 383 fp2 = m2.get(f, nullid)
384 384
385 385 # is the same revision on two branches of a merge?
386 386 if fp2 == fp1:
387 387 fp2 = nullid
388 388
389 389 if fp2 != nullid:
390 390 # is one parent an ancestor of the other?
391 391 fpa = r.ancestor(fp1, fp2)
392 392 if fpa == fp1:
393 393 fp1, fp2 = fp2, nullid
394 394 elif fpa == fp2:
395 395 fp2 = nullid
396 396
397 397 # is the file unmodified from the parent?
398 398 if not meta and t == r.read(fp1):
399 399 # record the proper existing parent in manifest
400 400 # no need to add a revision
401 401 new[f] = fp1
402 402 continue
403 403
404 404 new[f] = r.add(t, meta, tr, linkrev, fp1, fp2)
405 405 # remember what we've added so that we can later calculate
406 406 # the files to pull from a set of changesets
407 407 changed.append(f)
408 408
409 409 # update manifest
410 410 m1.update(new)
411 411 for f in remove:
412 412 if f in m1:
413 413 del m1[f]
414 414 mn = self.manifest.add(m1, mf1, tr, linkrev, c1[0], c2[0],
415 415 (new, remove))
416 416
417 417 # add changeset
418 418 new = new.keys()
419 419 new.sort()
420 420
421 421 if not text:
422 422 edittext = ""
423 423 if p2 != nullid:
424 424 edittext += "HG: branch merge\n"
425 425 edittext += "\n" + "HG: manifest hash %s\n" % hex(mn)
426 426 edittext += "".join(["HG: changed %s\n" % f for f in changed])
427 427 edittext += "".join(["HG: removed %s\n" % f for f in remove])
428 428 if not changed and not remove:
429 429 edittext += "HG: no files changed\n"
430 430 edittext = self.ui.edit(edittext)
431 431 if not edittext.rstrip():
432 432 return None
433 433 text = edittext
434 434
435 435 user = user or self.ui.username()
436 436 n = self.changelog.add(mn, changed, text, tr, p1, p2, user, date)
437 437 tr.close()
438 438
439 439 self.dirstate.setparents(n)
440 440 self.dirstate.update(new, "n")
441 441 self.dirstate.forget(remove)
442 442
443 443 if not self.hook("commit", node=hex(n)):
444 444 return None
445 445 return n
446 446
447 447 def walk(self, node=None, files=[], match=util.always):
448 448 if node:
449 449 for fn in self.manifest.read(self.changelog.read(node)[0]):
450 450 if match(fn): yield 'm', fn
451 451 else:
452 452 for src, fn in self.dirstate.walk(files, match):
453 453 yield src, fn
454 454
455 455 def changes(self, node1 = None, node2 = None, files = [],
456 456 match = util.always):
457 457 mf2, u = None, []
458 458
459 459 def fcmp(fn, mf):
460 460 t1 = self.wread(fn)
461 461 t2 = self.file(fn).read(mf.get(fn, nullid))
462 462 return cmp(t1, t2)
463 463
464 464 def mfmatches(node):
465 465 mf = dict(self.manifest.read(node))
466 466 for fn in mf.keys():
467 467 if not match(fn):
468 468 del mf[fn]
469 469 return mf
470 470
471 471 # are we comparing the working directory?
472 472 if not node2:
473 473 l, c, a, d, u = self.dirstate.changes(files, match)
474 474
475 475 # are we comparing working dir against its parent?
476 476 if not node1:
477 477 if l:
478 478 # do a full compare of any files that might have changed
479 479 change = self.changelog.read(self.dirstate.parents()[0])
480 480 mf2 = mfmatches(change[0])
481 481 for f in l:
482 482 if fcmp(f, mf2):
483 483 c.append(f)
484 484
485 485 for l in c, a, d, u:
486 486 l.sort()
487 487
488 488 return (c, a, d, u)
489 489
490 490 # are we comparing working dir against non-tip?
491 491 # generate a pseudo-manifest for the working dir
492 492 if not node2:
493 493 if not mf2:
494 494 change = self.changelog.read(self.dirstate.parents()[0])
495 495 mf2 = mfmatches(change[0])
496 496 for f in a + c + l:
497 497 mf2[f] = ""
498 498 for f in d:
499 499 if f in mf2: del mf2[f]
500 500 else:
501 501 change = self.changelog.read(node2)
502 502 mf2 = mfmatches(change[0])
503 503
504 504 # flush lists from dirstate before comparing manifests
505 505 c, a = [], []
506 506
507 507 change = self.changelog.read(node1)
508 508 mf1 = mfmatches(change[0])
509 509
510 510 for fn in mf2:
511 511 if mf1.has_key(fn):
512 512 if mf1[fn] != mf2[fn]:
513 513 if mf2[fn] != "" or fcmp(fn, mf1):
514 514 c.append(fn)
515 515 del mf1[fn]
516 516 else:
517 517 a.append(fn)
518 518
519 519 d = mf1.keys()
520 520
521 521 for l in c, a, d, u:
522 522 l.sort()
523 523
524 524 return (c, a, d, u)
525 525
526 526 def add(self, list):
527 527 for f in list:
528 528 p = self.wjoin(f)
529 529 if not os.path.exists(p):
530 530 self.ui.warn(_("%s does not exist!\n") % f)
531 531 elif not os.path.isfile(p):
532 532 self.ui.warn(_("%s not added: only files supported currently\n") % f)
533 533 elif self.dirstate.state(f) in 'an':
534 534 self.ui.warn(_("%s already tracked!\n") % f)
535 535 else:
536 536 self.dirstate.update([f], "a")
537 537
538 538 def forget(self, list):
539 539 for f in list:
540 540 if self.dirstate.state(f) not in 'ai':
541 541 self.ui.warn(_("%s not added!\n") % f)
542 542 else:
543 543 self.dirstate.forget([f])
544 544
545 545 def remove(self, list, unlink=False):
546 546 if unlink:
547 547 for f in list:
548 548 try:
549 549 util.unlink(self.wjoin(f))
550 550 except OSError, inst:
551 551 if inst.errno != errno.ENOENT: raise
552 552 for f in list:
553 553 p = self.wjoin(f)
554 554 if os.path.exists(p):
555 555 self.ui.warn(_("%s still exists!\n") % f)
556 556 elif self.dirstate.state(f) == 'a':
557 557 self.ui.warn(_("%s never committed!\n") % f)
558 558 self.dirstate.forget([f])
559 559 elif f not in self.dirstate:
560 560 self.ui.warn(_("%s not tracked!\n") % f)
561 561 else:
562 562 self.dirstate.update([f], "r")
563 563
564 564 def undelete(self, list):
565 565 p = self.dirstate.parents()[0]
566 566 mn = self.changelog.read(p)[0]
567 567 mf = self.manifest.readflags(mn)
568 568 m = self.manifest.read(mn)
569 569 for f in list:
570 570 if self.dirstate.state(f) not in "r":
571 571 self.ui.warn("%s not removed!\n" % f)
572 572 else:
573 573 t = self.file(f).read(m[f])
574 574 self.wwrite(f, t)
575 575 util.set_exec(self.wjoin(f), mf[f])
576 576 self.dirstate.update([f], "n")
577 577
578 578 def copy(self, source, dest):
579 579 p = self.wjoin(dest)
580 580 if not os.path.exists(p):
581 581 self.ui.warn(_("%s does not exist!\n") % dest)
582 582 elif not os.path.isfile(p):
583 583 self.ui.warn(_("copy failed: %s is not a file\n") % dest)
584 584 else:
585 585 if self.dirstate.state(dest) == '?':
586 586 self.dirstate.update([dest], "a")
587 587 self.dirstate.copy(source, dest)
588 588
589 589 def heads(self):
590 590 return self.changelog.heads()
591 591
592 592 # branchlookup returns a dict giving a list of branches for
593 593 # each head. A branch is defined as the tag of a node or
594 594 # the branch of the node's parents. If a node has multiple
595 595 # branch tags, tags are eliminated if they are visible from other
596 596 # branch tags.
597 597 #
598 598 # So, for this graph: a->b->c->d->e
599 599 # \ /
600 600 # aa -----/
601 601 # a has tag 2.6.12
602 602 # d has tag 2.6.13
603 603 # e would have branch tags for 2.6.12 and 2.6.13. Because the node
604 604 # for 2.6.12 can be reached from the node 2.6.13, that is eliminated
605 605 # from the list.
606 606 #
607 607 # It is possible that more than one head will have the same branch tag.
608 608 # callers need to check the result for multiple heads under the same
609 609 # branch tag if that is a problem for them (ie checkout of a specific
610 610 # branch).
611 611 #
612 612 # passing in a specific branch will limit the depth of the search
613 613 # through the parents. It won't limit the branches returned in the
614 614 # result though.
615 615 def branchlookup(self, heads=None, branch=None):
616 616 if not heads:
617 617 heads = self.heads()
618 618 headt = [ h for h in heads ]
619 619 chlog = self.changelog
620 620 branches = {}
621 621 merges = []
622 622 seenmerge = {}
623 623
624 624 # traverse the tree once for each head, recording in the branches
625 625 # dict which tags are visible from this head. The branches
626 626 # dict also records which tags are visible from each tag
627 627 # while we traverse.
628 628 while headt or merges:
629 629 if merges:
630 630 n, found = merges.pop()
631 631 visit = [n]
632 632 else:
633 633 h = headt.pop()
634 634 visit = [h]
635 635 found = [h]
636 636 seen = {}
637 637 while visit:
638 638 n = visit.pop()
639 639 if n in seen:
640 640 continue
641 641 pp = chlog.parents(n)
642 642 tags = self.nodetags(n)
643 643 if tags:
644 644 for x in tags:
645 645 if x == 'tip':
646 646 continue
647 647 for f in found:
648 648 branches.setdefault(f, {})[n] = 1
649 649 branches.setdefault(n, {})[n] = 1
650 650 break
651 651 if n not in found:
652 652 found.append(n)
653 653 if branch in tags:
654 654 continue
655 655 seen[n] = 1
656 656 if pp[1] != nullid and n not in seenmerge:
657 657 merges.append((pp[1], [x for x in found]))
658 658 seenmerge[n] = 1
659 659 if pp[0] != nullid:
660 660 visit.append(pp[0])
661 661 # traverse the branches dict, eliminating branch tags from each
662 662 # head that are visible from another branch tag for that head.
663 663 out = {}
664 664 viscache = {}
665 665 for h in heads:
666 666 def visible(node):
667 667 if node in viscache:
668 668 return viscache[node]
669 669 ret = {}
670 670 visit = [node]
671 671 while visit:
672 672 x = visit.pop()
673 673 if x in viscache:
674 674 ret.update(viscache[x])
675 675 elif x not in ret:
676 676 ret[x] = 1
677 677 if x in branches:
678 678 visit[len(visit):] = branches[x].keys()
679 679 viscache[node] = ret
680 680 return ret
681 681 if h not in branches:
682 682 continue
683 683 # O(n^2), but somewhat limited. This only searches the
684 684 # tags visible from a specific head, not all the tags in the
685 685 # whole repo.
686 686 for b in branches[h]:
687 687 vis = False
688 688 for bb in branches[h].keys():
689 689 if b != bb:
690 690 if b in visible(bb):
691 691 vis = True
692 692 break
693 693 if not vis:
694 694 l = out.setdefault(h, [])
695 695 l[len(l):] = self.nodetags(b)
696 696 return out
697 697
698 698 def branches(self, nodes):
699 699 if not nodes: nodes = [self.changelog.tip()]
700 700 b = []
701 701 for n in nodes:
702 702 t = n
703 703 while n:
704 704 p = self.changelog.parents(n)
705 705 if p[1] != nullid or p[0] == nullid:
706 706 b.append((t, n, p[0], p[1]))
707 707 break
708 708 n = p[0]
709 709 return b
710 710
711 711 def between(self, pairs):
712 712 r = []
713 713
714 714 for top, bottom in pairs:
715 715 n, l, i = top, [], 0
716 716 f = 1
717 717
718 718 while n != bottom:
719 719 p = self.changelog.parents(n)[0]
720 720 if i == f:
721 721 l.append(n)
722 722 f = f * 2
723 723 n = p
724 724 i += 1
725 725
726 726 r.append(l)
727 727
728 728 return r
729 729
730 730 def findincoming(self, remote, base=None, heads=None):
731 731 m = self.changelog.nodemap
732 732 search = []
733 733 fetch = {}
734 734 seen = {}
735 735 seenbranch = {}
736 736 if base == None:
737 737 base = {}
738 738
739 739 # assume we're closer to the tip than the root
740 740 # and start by examining the heads
741 741 self.ui.status(_("searching for changes\n"))
742 742
743 743 if not heads:
744 744 heads = remote.heads()
745 745
746 746 unknown = []
747 747 for h in heads:
748 748 if h not in m:
749 749 unknown.append(h)
750 750 else:
751 751 base[h] = 1
752 752
753 753 if not unknown:
754 754 return None
755 755
756 756 rep = {}
757 757 reqcnt = 0
758 758
759 759 # search through remote branches
760 760 # a 'branch' here is a linear segment of history, with four parts:
761 761 # head, root, first parent, second parent
762 762 # (a branch always has two parents (or none) by definition)
763 763 unknown = remote.branches(unknown)
764 764 while unknown:
765 765 r = []
766 766 while unknown:
767 767 n = unknown.pop(0)
768 768 if n[0] in seen:
769 769 continue
770 770
771 771 self.ui.debug(_("examining %s:%s\n") % (short(n[0]), short(n[1])))
772 772 if n[0] == nullid:
773 773 break
774 774 if n in seenbranch:
775 775 self.ui.debug(_("branch already found\n"))
776 776 continue
777 777 if n[1] and n[1] in m: # do we know the base?
778 778 self.ui.debug(_("found incomplete branch %s:%s\n")
779 779 % (short(n[0]), short(n[1])))
780 780 search.append(n) # schedule branch range for scanning
781 781 seenbranch[n] = 1
782 782 else:
783 783 if n[1] not in seen and n[1] not in fetch:
784 784 if n[2] in m and n[3] in m:
785 785 self.ui.debug(_("found new changeset %s\n") %
786 786 short(n[1]))
787 787 fetch[n[1]] = 1 # earliest unknown
788 788 base[n[2]] = 1 # latest known
789 789 continue
790 790
791 791 for a in n[2:4]:
792 792 if a not in rep:
793 793 r.append(a)
794 794 rep[a] = 1
795 795
796 796 seen[n[0]] = 1
797 797
798 798 if r:
799 799 reqcnt += 1
800 800 self.ui.debug(_("request %d: %s\n") %
801 801 (reqcnt, " ".join(map(short, r))))
802 802 for p in range(0, len(r), 10):
803 803 for b in remote.branches(r[p:p+10]):
804 804 self.ui.debug(_("received %s:%s\n") %
805 805 (short(b[0]), short(b[1])))
806 806 if b[0] in m:
807 807 self.ui.debug(_("found base node %s\n") % short(b[0]))
808 808 base[b[0]] = 1
809 809 elif b[0] not in seen:
810 810 unknown.append(b)
811 811
812 812 # do binary search on the branches we found
813 813 while search:
814 814 n = search.pop(0)
815 815 reqcnt += 1
816 816 l = remote.between([(n[0], n[1])])[0]
817 817 l.append(n[1])
818 818 p = n[0]
819 819 f = 1
820 820 for i in l:
821 821 self.ui.debug(_("narrowing %d:%d %s\n") % (f, len(l), short(i)))
822 822 if i in m:
823 823 if f <= 2:
824 824 self.ui.debug(_("found new branch changeset %s\n") %
825 825 short(p))
826 826 fetch[p] = 1
827 827 base[i] = 1
828 828 else:
829 829 self.ui.debug(_("narrowed branch search to %s:%s\n")
830 830 % (short(p), short(i)))
831 831 search.append((p, i))
832 832 break
833 833 p, f = i, f * 2
834 834
835 835 # sanity check our fetch list
836 836 for f in fetch.keys():
837 837 if f in m:
838 838 raise repo.RepoError(_("already have changeset ") + short(f[:4]))
839 839
840 840 if base.keys() == [nullid]:
841 841 self.ui.warn(_("warning: pulling from an unrelated repository!\n"))
842 842
843 843 self.ui.note(_("found new changesets starting at ") +
844 844 " ".join([short(f) for f in fetch]) + "\n")
845 845
846 846 self.ui.debug(_("%d total queries\n") % reqcnt)
847 847
848 848 return fetch.keys()
849 849
850 850 def findoutgoing(self, remote, base=None, heads=None):
851 851 if base == None:
852 852 base = {}
853 853 self.findincoming(remote, base, heads)
854 854
855 855 self.ui.debug(_("common changesets up to ")
856 856 + " ".join(map(short, base.keys())) + "\n")
857 857
858 858 remain = dict.fromkeys(self.changelog.nodemap)
859 859
860 860 # prune everything remote has from the tree
861 861 del remain[nullid]
862 862 remove = base.keys()
863 863 while remove:
864 864 n = remove.pop(0)
865 865 if n in remain:
866 866 del remain[n]
867 867 for p in self.changelog.parents(n):
868 868 remove.append(p)
869 869
870 870 # find every node whose parents have been pruned
871 871 subset = []
872 872 for n in remain:
873 873 p1, p2 = self.changelog.parents(n)
874 874 if p1 not in remain and p2 not in remain:
875 875 subset.append(n)
876 876
877 877 # this is the set of all roots we have to push
878 878 return subset
879 879
880 880 def pull(self, remote, heads = None):
881 881 lock = self.lock()
882 882
883 883 # if we have an empty repo, fetch everything
884 884 if self.changelog.tip() == nullid:
885 885 self.ui.status(_("requesting all changes\n"))
886 886 fetch = [nullid]
887 887 else:
888 888 fetch = self.findincoming(remote)
889 889
890 890 if not fetch:
891 891 self.ui.status(_("no changes found\n"))
892 892 return 1
893 893
894 894 if heads is None:
895 895 cg = remote.changegroup(fetch)
896 896 else:
897 897 cg = remote.changegroupsubset(fetch, heads)
898 898 return self.addchangegroup(cg)
899 899
900 900 def push(self, remote, force=False):
901 901 lock = remote.lock()
902 902
903 903 base = {}
904 904 heads = remote.heads()
905 905 inc = self.findincoming(remote, base, heads)
906 906 if not force and inc:
907 907 self.ui.warn(_("abort: unsynced remote changes!\n"))
908 908 self.ui.status(_("(did you forget to sync? use push -f to force)\n"))
909 909 return 1
910 910
911 911 update = self.findoutgoing(remote, base)
912 912 if not update:
913 913 self.ui.status(_("no changes found\n"))
914 914 return 1
915 915 elif not force:
916 916 if len(heads) < len(self.changelog.heads()):
917 917 self.ui.warn(_("abort: push creates new remote branches!\n"))
918 918 self.ui.status(_("(did you forget to merge?"
919 919 " use push -f to force)\n"))
920 920 return 1
921 921
922 922 cg = self.changegroup(update)
923 923 return remote.addchangegroup(cg)
924 924
925 925 def changegroupsubset(self, bases, heads):
926 926 """This function generates a changegroup consisting of all the nodes
927 927 that are descendents of any of the bases, and ancestors of any of
928 928 the heads.
929 929
930 930 It is fairly complex as determining which filenodes and which
931 931 manifest nodes need to be included for the changeset to be complete
932 932 is non-trivial.
933 933
934 934 Another wrinkle is doing the reverse, figuring out which changeset in
935 935 the changegroup a particular filenode or manifestnode belongs to."""
936 936
937 937 # Set up some initial variables
938 938 # Make it easy to refer to self.changelog
939 939 cl = self.changelog
940 940 # msng is short for missing - compute the list of changesets in this
941 941 # changegroup.
942 942 msng_cl_lst, bases, heads = cl.nodesbetween(bases, heads)
943 943 # Some bases may turn out to be superfluous, and some heads may be
944 944 # too. nodesbetween will return the minimal set of bases and heads
945 945 # necessary to re-create the changegroup.
946 946
947 947 # Known heads are the list of heads that it is assumed the recipient
948 948 # of this changegroup will know about.
949 949 knownheads = {}
950 950 # We assume that all parents of bases are known heads.
951 951 for n in bases:
952 952 for p in cl.parents(n):
953 953 if p != nullid:
954 954 knownheads[p] = 1
955 955 knownheads = knownheads.keys()
956 956 if knownheads:
957 957 # Now that we know what heads are known, we can compute which
958 958 # changesets are known. The recipient must know about all
959 959 # changesets required to reach the known heads from the null
960 960 # changeset.
961 961 has_cl_set, junk, junk = cl.nodesbetween(None, knownheads)
962 962 junk = None
963 963 # Transform the list into an ersatz set.
964 964 has_cl_set = dict.fromkeys(has_cl_set)
965 965 else:
966 966 # If there were no known heads, the recipient cannot be assumed to
967 967 # know about any changesets.
968 968 has_cl_set = {}
969 969
970 970 # Make it easy to refer to self.manifest
971 971 mnfst = self.manifest
972 972 # We don't know which manifests are missing yet
973 973 msng_mnfst_set = {}
974 974 # Nor do we know which filenodes are missing.
975 975 msng_filenode_set = {}
976 976
977 977 junk = mnfst.index[mnfst.count() - 1] # Get around a bug in lazyindex
978 978 junk = None
979 979
980 980 # A changeset always belongs to itself, so the changenode lookup
981 981 # function for a changenode is identity.
982 982 def identity(x):
983 983 return x
984 984
985 985 # A function generating function. Sets up an environment for the
986 986 # inner function.
987 987 def cmp_by_rev_func(revlog):
988 988 # Compare two nodes by their revision number in the environment's
989 989 # revision history. Since the revision number both represents the
990 990 # most efficient order to read the nodes in, and represents a
991 991 # topological sorting of the nodes, this function is often useful.
992 992 def cmp_by_rev(a, b):
993 993 return cmp(revlog.rev(a), revlog.rev(b))
994 994 return cmp_by_rev
995 995
996 996 # If we determine that a particular file or manifest node must be a
997 997 # node that the recipient of the changegroup will already have, we can
998 998 # also assume the recipient will have all the parents. This function
999 999 # prunes them from the set of missing nodes.
1000 1000 def prune_parents(revlog, hasset, msngset):
1001 1001 haslst = hasset.keys()
1002 1002 haslst.sort(cmp_by_rev_func(revlog))
1003 1003 for node in haslst:
1004 1004 parentlst = [p for p in revlog.parents(node) if p != nullid]
1005 1005 while parentlst:
1006 1006 n = parentlst.pop()
1007 1007 if n not in hasset:
1008 1008 hasset[n] = 1
1009 1009 p = [p for p in revlog.parents(n) if p != nullid]
1010 1010 parentlst.extend(p)
1011 1011 for n in hasset:
1012 1012 msngset.pop(n, None)
1013 1013
1014 1014 # This is a function generating function used to set up an environment
1015 1015 # for the inner function to execute in.
1016 1016 def manifest_and_file_collector(changedfileset):
1017 1017 # This is an information gathering function that gathers
1018 1018 # information from each changeset node that goes out as part of
1019 1019 # the changegroup. The information gathered is a list of which
1020 1020 # manifest nodes are potentially required (the recipient may
1021 1021 # already have them) and total list of all files which were
1022 1022 # changed in any changeset in the changegroup.
1023 1023 #
1024 1024 # We also remember the first changenode we saw any manifest
1025 1025 # referenced by so we can later determine which changenode 'owns'
1026 1026 # the manifest.
1027 1027 def collect_manifests_and_files(clnode):
1028 1028 c = cl.read(clnode)
1029 1029 for f in c[3]:
1030 1030 # This is to make sure we only have one instance of each
1031 1031 # filename string for each filename.
1032 1032 changedfileset.setdefault(f, f)
1033 1033 msng_mnfst_set.setdefault(c[0], clnode)
1034 1034 return collect_manifests_and_files
1035 1035
1036 1036 # Figure out which manifest nodes (of the ones we think might be part
1037 1037 # of the changegroup) the recipient must know about and remove them
1038 1038 # from the changegroup.
1039 1039 def prune_manifests():
1040 1040 has_mnfst_set = {}
1041 1041 for n in msng_mnfst_set:
1042 1042 # If a 'missing' manifest thinks it belongs to a changenode
1043 1043 # the recipient is assumed to have, obviously the recipient
1044 1044 # must have that manifest.
1045 1045 linknode = cl.node(mnfst.linkrev(n))
1046 1046 if linknode in has_cl_set:
1047 1047 has_mnfst_set[n] = 1
1048 1048 prune_parents(mnfst, has_mnfst_set, msng_mnfst_set)
1049 1049
1050 1050 # Use the information collected in collect_manifests_and_files to say
1051 1051 # which changenode any manifestnode belongs to.
1052 1052 def lookup_manifest_link(mnfstnode):
1053 1053 return msng_mnfst_set[mnfstnode]
1054 1054
1055 1055 # A function generating function that sets up the initial environment
1056 1056 # the inner function.
1057 1057 def filenode_collector(changedfiles):
1058 1058 next_rev = [0]
1059 1059 # This gathers information from each manifestnode included in the
1060 1060 # changegroup about which filenodes the manifest node references
1061 1061 # so we can include those in the changegroup too.
1062 1062 #
1063 1063 # It also remembers which changenode each filenode belongs to. It
1064 1064 # does this by assuming the a filenode belongs to the changenode
1065 1065 # the first manifest that references it belongs to.
1066 1066 def collect_msng_filenodes(mnfstnode):
1067 1067 r = mnfst.rev(mnfstnode)
1068 1068 if r == next_rev[0]:
1069 1069 # If the last rev we looked at was the one just previous,
1070 1070 # we only need to see a diff.
1071 1071 delta = mdiff.patchtext(mnfst.delta(mnfstnode))
1072 1072 # For each line in the delta
1073 1073 for dline in delta.splitlines():
1074 1074 # get the filename and filenode for that line
1075 1075 f, fnode = dline.split('\0')
1076 1076 fnode = bin(fnode[:40])
1077 1077 f = changedfiles.get(f, None)
1078 1078 # And if the file is in the list of files we care
1079 1079 # about.
1080 1080 if f is not None:
1081 1081 # Get the changenode this manifest belongs to
1082 1082 clnode = msng_mnfst_set[mnfstnode]
1083 1083 # Create the set of filenodes for the file if
1084 1084 # there isn't one already.
1085 1085 ndset = msng_filenode_set.setdefault(f, {})
1086 1086 # And set the filenode's changelog node to the
1087 1087 # manifest's if it hasn't been set already.
1088 1088 ndset.setdefault(fnode, clnode)
1089 1089 else:
1090 1090 # Otherwise we need a full manifest.
1091 1091 m = mnfst.read(mnfstnode)
1092 1092 # For every file in we care about.
1093 1093 for f in changedfiles:
1094 1094 fnode = m.get(f, None)
1095 1095 # If it's in the manifest
1096 1096 if fnode is not None:
1097 1097 # See comments above.
1098 1098 clnode = msng_mnfst_set[mnfstnode]
1099 1099 ndset = msng_filenode_set.setdefault(f, {})
1100 1100 ndset.setdefault(fnode, clnode)
1101 1101 # Remember the revision we hope to see next.
1102 1102 next_rev[0] = r + 1
1103 1103 return collect_msng_filenodes
1104 1104
1105 1105 # We have a list of filenodes we think we need for a file, lets remove
1106 1106 # all those we now the recipient must have.
1107 1107 def prune_filenodes(f, filerevlog):
1108 1108 msngset = msng_filenode_set[f]
1109 1109 hasset = {}
1110 1110 # If a 'missing' filenode thinks it belongs to a changenode we
1111 1111 # assume the recipient must have, then the recipient must have
1112 1112 # that filenode.
1113 1113 for n in msngset:
1114 1114 clnode = cl.node(filerevlog.linkrev(n))
1115 1115 if clnode in has_cl_set:
1116 1116 hasset[n] = 1
1117 1117 prune_parents(filerevlog, hasset, msngset)
1118 1118
1119 1119 # A function generator function that sets up the a context for the
1120 1120 # inner function.
1121 1121 def lookup_filenode_link_func(fname):
1122 1122 msngset = msng_filenode_set[fname]
1123 1123 # Lookup the changenode the filenode belongs to.
1124 1124 def lookup_filenode_link(fnode):
1125 1125 return msngset[fnode]
1126 1126 return lookup_filenode_link
1127 1127
1128 1128 # Now that we have all theses utility functions to help out and
1129 1129 # logically divide up the task, generate the group.
1130 1130 def gengroup():
1131 1131 # The set of changed files starts empty.
1132 1132 changedfiles = {}
1133 1133 # Create a changenode group generator that will call our functions
1134 1134 # back to lookup the owning changenode and collect information.
1135 1135 group = cl.group(msng_cl_lst, identity,
1136 1136 manifest_and_file_collector(changedfiles))
1137 1137 for chnk in group:
1138 1138 yield chnk
1139 1139
1140 1140 # The list of manifests has been collected by the generator
1141 1141 # calling our functions back.
1142 1142 prune_manifests()
1143 1143 msng_mnfst_lst = msng_mnfst_set.keys()
1144 1144 # Sort the manifestnodes by revision number.
1145 1145 msng_mnfst_lst.sort(cmp_by_rev_func(mnfst))
1146 1146 # Create a generator for the manifestnodes that calls our lookup
1147 1147 # and data collection functions back.
1148 1148 group = mnfst.group(msng_mnfst_lst, lookup_manifest_link,
1149 1149 filenode_collector(changedfiles))
1150 1150 for chnk in group:
1151 1151 yield chnk
1152 1152
1153 1153 # These are no longer needed, dereference and toss the memory for
1154 1154 # them.
1155 1155 msng_mnfst_lst = None
1156 1156 msng_mnfst_set.clear()
1157 1157
1158 1158 changedfiles = changedfiles.keys()
1159 1159 changedfiles.sort()
1160 1160 # Go through all our files in order sorted by name.
1161 1161 for fname in changedfiles:
1162 1162 filerevlog = self.file(fname)
1163 1163 # Toss out the filenodes that the recipient isn't really
1164 1164 # missing.
1165 1165 prune_filenodes(fname, filerevlog)
1166 1166 msng_filenode_lst = msng_filenode_set[fname].keys()
1167 1167 # If any filenodes are left, generate the group for them,
1168 1168 # otherwise don't bother.
1169 1169 if len(msng_filenode_lst) > 0:
1170 1170 yield struct.pack(">l", len(fname) + 4) + fname
1171 1171 # Sort the filenodes by their revision #
1172 1172 msng_filenode_lst.sort(cmp_by_rev_func(filerevlog))
1173 1173 # Create a group generator and only pass in a changenode
1174 1174 # lookup function as we need to collect no information
1175 1175 # from filenodes.
1176 1176 group = filerevlog.group(msng_filenode_lst,
1177 1177 lookup_filenode_link_func(fname))
1178 1178 for chnk in group:
1179 1179 yield chnk
1180 1180 # Don't need this anymore, toss it to free memory.
1181 1181 del msng_filenode_set[fname]
1182 1182 # Signal that no more groups are left.
1183 1183 yield struct.pack(">l", 0)
1184 1184
1185 1185 return util.chunkbuffer(gengroup())
1186 1186
1187 1187 def changegroup(self, basenodes):
1188 1188 """Generate a changegroup of all nodes that we have that a recipient
1189 1189 doesn't.
1190 1190
1191 1191 This is much easier than the previous function as we can assume that
1192 1192 the recipient has any changenode we aren't sending them."""
1193 1193 cl = self.changelog
1194 1194 nodes = cl.nodesbetween(basenodes, None)[0]
1195 1195 revset = dict.fromkeys([cl.rev(n) for n in nodes])
1196 1196
1197 1197 def identity(x):
1198 1198 return x
1199 1199
1200 1200 def gennodelst(revlog):
1201 1201 for r in xrange(0, revlog.count()):
1202 1202 n = revlog.node(r)
1203 1203 if revlog.linkrev(n) in revset:
1204 1204 yield n
1205 1205
1206 1206 def changed_file_collector(changedfileset):
1207 1207 def collect_changed_files(clnode):
1208 1208 c = cl.read(clnode)
1209 1209 for fname in c[3]:
1210 1210 changedfileset[fname] = 1
1211 1211 return collect_changed_files
1212 1212
1213 1213 def lookuprevlink_func(revlog):
1214 1214 def lookuprevlink(n):
1215 1215 return cl.node(revlog.linkrev(n))
1216 1216 return lookuprevlink
1217 1217
1218 1218 def gengroup():
1219 1219 # construct a list of all changed files
1220 1220 changedfiles = {}
1221 1221
1222 1222 for chnk in cl.group(nodes, identity,
1223 1223 changed_file_collector(changedfiles)):
1224 1224 yield chnk
1225 1225 changedfiles = changedfiles.keys()
1226 1226 changedfiles.sort()
1227 1227
1228 1228 mnfst = self.manifest
1229 1229 nodeiter = gennodelst(mnfst)
1230 1230 for chnk in mnfst.group(nodeiter, lookuprevlink_func(mnfst)):
1231 1231 yield chnk
1232 1232
1233 1233 for fname in changedfiles:
1234 1234 filerevlog = self.file(fname)
1235 1235 nodeiter = gennodelst(filerevlog)
1236 1236 nodeiter = list(nodeiter)
1237 1237 if nodeiter:
1238 1238 yield struct.pack(">l", len(fname) + 4) + fname
1239 1239 lookup = lookuprevlink_func(filerevlog)
1240 1240 for chnk in filerevlog.group(nodeiter, lookup):
1241 1241 yield chnk
1242 1242
1243 1243 yield struct.pack(">l", 0)
1244 1244
1245 1245 return util.chunkbuffer(gengroup())
1246 1246
1247 1247 def addchangegroup(self, source):
1248 1248
1249 1249 def getchunk():
1250 1250 d = source.read(4)
1251 1251 if not d: return ""
1252 1252 l = struct.unpack(">l", d)[0]
1253 1253 if l <= 4: return ""
1254 1254 d = source.read(l - 4)
1255 1255 if len(d) < l - 4:
1256 1256 raise repo.RepoError(_("premature EOF reading chunk"
1257 1257 " (got %d bytes, expected %d)")
1258 1258 % (len(d), l - 4))
1259 1259 return d
1260 1260
1261 1261 def getgroup():
1262 1262 while 1:
1263 1263 c = getchunk()
1264 1264 if not c: break
1265 1265 yield c
1266 1266
1267 1267 def csmap(x):
1268 1268 self.ui.debug(_("add changeset %s\n") % short(x))
1269 1269 return self.changelog.count()
1270 1270
1271 1271 def revmap(x):
1272 1272 return self.changelog.rev(x)
1273 1273
1274 1274 if not source: return
1275 1275 changesets = files = revisions = 0
1276 1276
1277 1277 tr = self.transaction()
1278 1278
1279 1279 oldheads = len(self.changelog.heads())
1280 1280
1281 1281 # pull off the changeset group
1282 1282 self.ui.status(_("adding changesets\n"))
1283 1283 co = self.changelog.tip()
1284 1284 cn = self.changelog.addgroup(getgroup(), csmap, tr, 1) # unique
1285 1285 cnr, cor = map(self.changelog.rev, (cn, co))
1286 1286 if cn == nullid:
1287 1287 cnr = cor
1288 1288 changesets = cnr - cor
1289 1289
1290 1290 # pull off the manifest group
1291 1291 self.ui.status(_("adding manifests\n"))
1292 1292 mm = self.manifest.tip()
1293 1293 mo = self.manifest.addgroup(getgroup(), revmap, tr)
1294 1294
1295 1295 # process the files
1296 1296 self.ui.status(_("adding file changes\n"))
1297 1297 while 1:
1298 1298 f = getchunk()
1299 1299 if not f: break
1300 1300 self.ui.debug(_("adding %s revisions\n") % f)
1301 1301 fl = self.file(f)
1302 1302 o = fl.count()
1303 1303 n = fl.addgroup(getgroup(), revmap, tr)
1304 1304 revisions += fl.count() - o
1305 1305 files += 1
1306 1306
1307 1307 newheads = len(self.changelog.heads())
1308 1308 heads = ""
1309 1309 if oldheads and newheads > oldheads:
1310 1310 heads = _(" (+%d heads)") % (newheads - oldheads)
1311 1311
1312 1312 self.ui.status(_("added %d changesets"
1313 1313 " with %d changes to %d files%s\n")
1314 1314 % (changesets, revisions, files, heads))
1315 1315
1316 1316 tr.close()
1317 1317
1318 1318 if changesets > 0:
1319 1319 if not self.hook("changegroup",
1320 1320 node=hex(self.changelog.node(cor+1))):
1321 1321 self.ui.warn(_("abort: changegroup hook returned failure!\n"))
1322 1322 return 1
1323 1323
1324 1324 for i in range(cor + 1, cnr + 1):
1325 1325 self.hook("commit", node=hex(self.changelog.node(i)))
1326 1326
1327 1327 return
1328 1328
1329 1329 def update(self, node, allow=False, force=False, choose=None,
1330 1330 moddirstate=True):
1331 1331 pl = self.dirstate.parents()
1332 1332 if not force and pl[1] != nullid:
1333 1333 self.ui.warn(_("aborting: outstanding uncommitted merges\n"))
1334 1334 return 1
1335 1335
1336 1336 p1, p2 = pl[0], node
1337 1337 pa = self.changelog.ancestor(p1, p2)
1338 1338 m1n = self.changelog.read(p1)[0]
1339 1339 m2n = self.changelog.read(p2)[0]
1340 1340 man = self.manifest.ancestor(m1n, m2n)
1341 1341 m1 = self.manifest.read(m1n)
1342 1342 mf1 = self.manifest.readflags(m1n)
1343 1343 m2 = self.manifest.read(m2n)
1344 1344 mf2 = self.manifest.readflags(m2n)
1345 1345 ma = self.manifest.read(man)
1346 1346 mfa = self.manifest.readflags(man)
1347 1347
1348 1348 (c, a, d, u) = self.changes()
1349 1349
1350 1350 # is this a jump, or a merge? i.e. is there a linear path
1351 1351 # from p1 to p2?
1352 1352 linear_path = (pa == p1 or pa == p2)
1353 1353
1354 1354 # resolve the manifest to determine which files
1355 1355 # we care about merging
1356 1356 self.ui.note(_("resolving manifests\n"))
1357 1357 self.ui.debug(_(" force %s allow %s moddirstate %s linear %s\n") %
1358 1358 (force, allow, moddirstate, linear_path))
1359 1359 self.ui.debug(_(" ancestor %s local %s remote %s\n") %
1360 1360 (short(man), short(m1n), short(m2n)))
1361 1361
1362 1362 merge = {}
1363 1363 get = {}
1364 1364 remove = []
1365 1365
1366 1366 # construct a working dir manifest
1367 1367 mw = m1.copy()
1368 1368 mfw = mf1.copy()
1369 1369 umap = dict.fromkeys(u)
1370 1370
1371 1371 for f in a + c + u:
1372 1372 mw[f] = ""
1373 1373 mfw[f] = util.is_exec(self.wjoin(f), mfw.get(f, False))
1374 1374
1375 1375 for f in d:
1376 1376 if f in mw: del mw[f]
1377 1377
1378 1378 # If we're jumping between revisions (as opposed to merging),
1379 1379 # and if neither the working directory nor the target rev has
1380 1380 # the file, then we need to remove it from the dirstate, to
1381 1381 # prevent the dirstate from listing the file when it is no
1382 1382 # longer in the manifest.
1383 1383 if moddirstate and linear_path and f not in m2:
1384 1384 self.dirstate.forget((f,))
1385 1385
1386 1386 # Compare manifests
1387 1387 for f, n in mw.iteritems():
1388 1388 if choose and not choose(f): continue
1389 1389 if f in m2:
1390 1390 s = 0
1391 1391
1392 1392 # is the wfile new since m1, and match m2?
1393 1393 if f not in m1:
1394 1394 t1 = self.wread(f)
1395 1395 t2 = self.file(f).read(m2[f])
1396 1396 if cmp(t1, t2) == 0:
1397 1397 n = m2[f]
1398 1398 del t1, t2
1399 1399
1400 1400 # are files different?
1401 1401 if n != m2[f]:
1402 1402 a = ma.get(f, nullid)
1403 1403 # are both different from the ancestor?
1404 1404 if n != a and m2[f] != a:
1405 1405 self.ui.debug(_(" %s versions differ, resolve\n") % f)
1406 1406 # merge executable bits
1407 1407 # "if we changed or they changed, change in merge"
1408 1408 a, b, c = mfa.get(f, 0), mfw[f], mf2[f]
1409 1409 mode = ((a^b) | (a^c)) ^ a
1410 1410 merge[f] = (m1.get(f, nullid), m2[f], mode)
1411 1411 s = 1
1412 1412 # are we clobbering?
1413 1413 # is remote's version newer?
1414 1414 # or are we going back in time?
1415 1415 elif force or m2[f] != a or (p2 == pa and mw[f] == m1[f]):
1416 1416 self.ui.debug(_(" remote %s is newer, get\n") % f)
1417 1417 get[f] = m2[f]
1418 1418 s = 1
1419 1419 elif f in umap:
1420 1420 # this unknown file is the same as the checkout
1421 1421 get[f] = m2[f]
1422 1422
1423 1423 if not s and mfw[f] != mf2[f]:
1424 1424 if force:
1425 1425 self.ui.debug(_(" updating permissions for %s\n") % f)
1426 1426 util.set_exec(self.wjoin(f), mf2[f])
1427 1427 else:
1428 1428 a, b, c = mfa.get(f, 0), mfw[f], mf2[f]
1429 1429 mode = ((a^b) | (a^c)) ^ a
1430 1430 if mode != b:
1431 1431 self.ui.debug(_(" updating permissions for %s\n") % f)
1432 1432 util.set_exec(self.wjoin(f), mode)
1433 1433 del m2[f]
1434 1434 elif f in ma:
1435 1435 if n != ma[f]:
1436 1436 r = _("d")
1437 1437 if not force and (linear_path or allow):
1438 1438 r = self.ui.prompt(
1439 1439 (_(" local changed %s which remote deleted\n") % f) +
1440 1440 _("(k)eep or (d)elete?"), _("[kd]"), _("k"))
1441 1441 if r == _("d"):
1442 1442 remove.append(f)
1443 1443 else:
1444 1444 self.ui.debug(_("other deleted %s\n") % f)
1445 1445 remove.append(f) # other deleted it
1446 1446 else:
1447 1447 # file is created on branch or in working directory
1448 1448 if force and f not in umap:
1449 1449 self.ui.debug(_("remote deleted %s, clobbering\n") % f)
1450 1450 remove.append(f)
1451 1451 elif n == m1.get(f, nullid): # same as parent
1452 1452 if p2 == pa: # going backwards?
1453 1453 self.ui.debug(_("remote deleted %s\n") % f)
1454 1454 remove.append(f)
1455 1455 else:
1456 1456 self.ui.debug(_("local modified %s, keeping\n") % f)
1457 1457 else:
1458 1458 self.ui.debug(_("working dir created %s, keeping\n") % f)
1459 1459
1460 1460 for f, n in m2.iteritems():
1461 1461 if choose and not choose(f): continue
1462 1462 if f[0] == "/": continue
1463 1463 if f in ma and n != ma[f]:
1464 1464 r = _("k")
1465 1465 if not force and (linear_path or allow):
1466 1466 r = self.ui.prompt(
1467 1467 (_("remote changed %s which local deleted\n") % f) +
1468 1468 _("(k)eep or (d)elete?"), _("[kd]"), _("k"))
1469 1469 if r == _("k"): get[f] = n
1470 1470 elif f not in ma:
1471 1471 self.ui.debug(_("remote created %s\n") % f)
1472 1472 get[f] = n
1473 1473 else:
1474 1474 if force or p2 == pa: # going backwards?
1475 1475 self.ui.debug(_("local deleted %s, recreating\n") % f)
1476 1476 get[f] = n
1477 1477 else:
1478 1478 self.ui.debug(_("local deleted %s\n") % f)
1479 1479
1480 1480 del mw, m1, m2, ma
1481 1481
1482 1482 if force:
1483 1483 for f in merge:
1484 1484 get[f] = merge[f][1]
1485 1485 merge = {}
1486 1486
1487 1487 if linear_path or force:
1488 1488 # we don't need to do any magic, just jump to the new rev
1489 1489 branch_merge = False
1490 1490 p1, p2 = p2, nullid
1491 1491 else:
1492 1492 if not allow:
1493 1493 self.ui.status(_("this update spans a branch"
1494 1494 " affecting the following files:\n"))
1495 1495 fl = merge.keys() + get.keys()
1496 1496 fl.sort()
1497 1497 for f in fl:
1498 1498 cf = ""
1499 1499 if f in merge: cf = _(" (resolve)")
1500 1500 self.ui.status(" %s%s\n" % (f, cf))
1501 1501 self.ui.warn(_("aborting update spanning branches!\n"))
1502 1502 self.ui.status(_("(use update -m to merge across branches"
1503 1503 " or -C to lose changes)\n"))
1504 1504 return 1
1505 1505 branch_merge = True
1506 1506
1507 1507 if moddirstate:
1508 1508 self.dirstate.setparents(p1, p2)
1509 1509
1510 1510 # get the files we don't need to change
1511 1511 files = get.keys()
1512 1512 files.sort()
1513 1513 for f in files:
1514 1514 if f[0] == "/": continue
1515 1515 self.ui.note(_("getting %s\n") % f)
1516 1516 t = self.file(f).read(get[f])
1517 1517 self.wwrite(f, t)
1518 1518 util.set_exec(self.wjoin(f), mf2[f])
1519 1519 if moddirstate:
1520 1520 if branch_merge:
1521 1521 self.dirstate.update([f], 'n', st_mtime=-1)
1522 1522 else:
1523 1523 self.dirstate.update([f], 'n')
1524 1524
1525 1525 # merge the tricky bits
1526 1526 files = merge.keys()
1527 1527 files.sort()
1528 1528 for f in files:
1529 1529 self.ui.status(_("merging %s\n") % f)
1530 1530 my, other, flag = merge[f]
1531 1531 self.merge3(f, my, other)
1532 1532 util.set_exec(self.wjoin(f), flag)
1533 1533 if moddirstate:
1534 1534 if branch_merge:
1535 1535 # We've done a branch merge, mark this file as merged
1536 1536 # so that we properly record the merger later
1537 1537 self.dirstate.update([f], 'm')
1538 1538 else:
1539 1539 # We've update-merged a locally modified file, so
1540 1540 # we set the dirstate to emulate a normal checkout
1541 1541 # of that file some time in the past. Thus our
1542 1542 # merge will appear as a normal local file
1543 1543 # modification.
1544 1544 f_len = len(self.file(f).read(other))
1545 1545 self.dirstate.update([f], 'n', st_size=f_len, st_mtime=-1)
1546 1546
1547 1547 remove.sort()
1548 1548 for f in remove:
1549 1549 self.ui.note(_("removing %s\n") % f)
1550 1550 try:
1551 1551 util.unlink(self.wjoin(f))
1552 1552 except OSError, inst:
1553 1553 if inst.errno != errno.ENOENT:
1554 1554 self.ui.warn(_("update failed to remove %s: %s!\n") %
1555 1555 (f, inst.strerror))
1556 1556 if moddirstate:
1557 1557 if branch_merge:
1558 1558 self.dirstate.update(remove, 'r')
1559 1559 else:
1560 1560 self.dirstate.forget(remove)
1561 1561
1562 1562 def merge3(self, fn, my, other):
1563 1563 """perform a 3-way merge in the working directory"""
1564 1564
1565 1565 def temp(prefix, node):
1566 1566 pre = "%s~%s." % (os.path.basename(fn), prefix)
1567 1567 (fd, name) = tempfile.mkstemp("", pre)
1568 1568 f = os.fdopen(fd, "wb")
1569 1569 self.wwrite(fn, fl.read(node), f)
1570 1570 f.close()
1571 1571 return name
1572 1572
1573 1573 fl = self.file(fn)
1574 1574 base = fl.ancestor(my, other)
1575 1575 a = self.wjoin(fn)
1576 1576 b = temp("base", base)
1577 1577 c = temp("other", other)
1578 1578
1579 1579 self.ui.note(_("resolving %s\n") % fn)
1580 1580 self.ui.debug(_("file %s: my %s other %s ancestor %s\n") %
1581 1581 (fn, short(my), short(other), short(base)))
1582 1582
1583 1583 cmd = (os.environ.get("HGMERGE") or self.ui.config("ui", "merge")
1584 1584 or "hgmerge")
1585 1585 r = os.system('%s "%s" "%s" "%s"' % (cmd, a, b, c))
1586 1586 if r:
1587 1587 self.ui.warn(_("merging %s failed!\n") % fn)
1588 1588
1589 1589 os.unlink(b)
1590 1590 os.unlink(c)
1591 1591
1592 1592 def verify(self):
1593 1593 filelinkrevs = {}
1594 1594 filenodes = {}
1595 1595 changesets = revisions = files = 0
1596 1596 errors = [0]
1597 1597 neededmanifests = {}
1598 1598
1599 1599 def err(msg):
1600 1600 self.ui.warn(msg + "\n")
1601 1601 errors[0] += 1
1602 1602
1603 1603 seen = {}
1604 1604 self.ui.status(_("checking changesets\n"))
1605 d = self.changelog.checksize()
1606 if d:
1607 err(_("changeset data short %d bytes") % d)
1605 1608 for i in range(self.changelog.count()):
1606 1609 changesets += 1
1607 1610 n = self.changelog.node(i)
1608 1611 l = self.changelog.linkrev(n)
1609 1612 if l != i:
1610 1613 err(_("incorrect link (%d) for changeset revision %d") %(l, i))
1611 1614 if n in seen:
1612 1615 err(_("duplicate changeset at revision %d") % i)
1613 1616 seen[n] = 1
1614 1617
1615 1618 for p in self.changelog.parents(n):
1616 1619 if p not in self.changelog.nodemap:
1617 1620 err(_("changeset %s has unknown parent %s") %
1618 1621 (short(n), short(p)))
1619 1622 try:
1620 1623 changes = self.changelog.read(n)
1621 1624 except KeyboardInterrupt:
1622 1625 self.ui.warn(_("interrupted"))
1623 1626 raise
1624 1627 except Exception, inst:
1625 1628 err(_("unpacking changeset %s: %s") % (short(n), inst))
1626 1629
1627 1630 neededmanifests[changes[0]] = n
1628 1631
1629 1632 for f in changes[3]:
1630 1633 filelinkrevs.setdefault(f, []).append(i)
1631 1634
1632 1635 seen = {}
1633 1636 self.ui.status(_("checking manifests\n"))
1637 d = self.manifest.checksize()
1638 if d:
1639 err(_("manifest data short %d bytes") % d)
1634 1640 for i in range(self.manifest.count()):
1635 1641 n = self.manifest.node(i)
1636 1642 l = self.manifest.linkrev(n)
1637 1643
1638 1644 if l < 0 or l >= self.changelog.count():
1639 1645 err(_("bad manifest link (%d) at revision %d") % (l, i))
1640 1646
1641 1647 if n in neededmanifests:
1642 1648 del neededmanifests[n]
1643 1649
1644 1650 if n in seen:
1645 1651 err(_("duplicate manifest at revision %d") % i)
1646 1652
1647 1653 seen[n] = 1
1648 1654
1649 1655 for p in self.manifest.parents(n):
1650 1656 if p not in self.manifest.nodemap:
1651 1657 err(_("manifest %s has unknown parent %s") %
1652 1658 (short(n), short(p)))
1653 1659
1654 1660 try:
1655 1661 delta = mdiff.patchtext(self.manifest.delta(n))
1656 1662 except KeyboardInterrupt:
1657 1663 self.ui.warn(_("interrupted"))
1658 1664 raise
1659 1665 except Exception, inst:
1660 1666 err(_("unpacking manifest %s: %s") % (short(n), inst))
1661 1667
1662 1668 ff = [ l.split('\0') for l in delta.splitlines() ]
1663 1669 for f, fn in ff:
1664 1670 filenodes.setdefault(f, {})[bin(fn[:40])] = 1
1665 1671
1666 1672 self.ui.status(_("crosschecking files in changesets and manifests\n"))
1667 1673
1668 1674 for m,c in neededmanifests.items():
1669 1675 err(_("Changeset %s refers to unknown manifest %s") %
1670 1676 (short(m), short(c)))
1671 1677 del neededmanifests
1672 1678
1673 1679 for f in filenodes:
1674 1680 if f not in filelinkrevs:
1675 1681 err(_("file %s in manifest but not in changesets") % f)
1676 1682
1677 1683 for f in filelinkrevs:
1678 1684 if f not in filenodes:
1679 1685 err(_("file %s in changeset but not in manifest") % f)
1680 1686
1681 1687 self.ui.status(_("checking files\n"))
1682 1688 ff = filenodes.keys()
1683 1689 ff.sort()
1684 1690 for f in ff:
1685 1691 if f == "/dev/null": continue
1686 1692 files += 1
1687 1693 fl = self.file(f)
1694 d = fl.checksize()
1695 if d:
1696 err(_("%s file data short %d bytes") % (f, d))
1697
1688 1698 nodes = { nullid: 1 }
1689 1699 seen = {}
1690 1700 for i in range(fl.count()):
1691 1701 revisions += 1
1692 1702 n = fl.node(i)
1693 1703
1694 1704 if n in seen:
1695 1705 err(_("%s: duplicate revision %d") % (f, i))
1696 1706 if n not in filenodes[f]:
1697 1707 err(_("%s: %d:%s not in manifests") % (f, i, short(n)))
1698 1708 else:
1699 1709 del filenodes[f][n]
1700 1710
1701 1711 flr = fl.linkrev(n)
1702 1712 if flr not in filelinkrevs[f]:
1703 1713 err(_("%s:%s points to unexpected changeset %d")
1704 1714 % (f, short(n), flr))
1705 1715 else:
1706 1716 filelinkrevs[f].remove(flr)
1707 1717
1708 1718 # verify contents
1709 1719 try:
1710 1720 t = fl.read(n)
1711 1721 except KeyboardInterrupt:
1712 1722 self.ui.warn(_("interrupted"))
1713 1723 raise
1714 1724 except Exception, inst:
1715 1725 err(_("unpacking file %s %s: %s") % (f, short(n), inst))
1716 1726
1717 1727 # verify parents
1718 1728 (p1, p2) = fl.parents(n)
1719 1729 if p1 not in nodes:
1720 1730 err(_("file %s:%s unknown parent 1 %s") %
1721 1731 (f, short(n), short(p1)))
1722 1732 if p2 not in nodes:
1723 1733 err(_("file %s:%s unknown parent 2 %s") %
1724 1734 (f, short(n), short(p1)))
1725 1735 nodes[n] = 1
1726 1736
1727 1737 # cross-check
1728 1738 for node in filenodes[f]:
1729 1739 err(_("node %s in manifests not in %s") % (hex(node), f))
1730 1740
1731 1741 self.ui.status(_("%d files, %d changesets, %d total revisions\n") %
1732 1742 (files, changesets, revisions))
1733 1743
1734 1744 if errors[0]:
1735 1745 self.ui.warn(_("%d integrity errors encountered!\n") % errors[0])
1736 1746 return 1
@@ -1,825 +1,834 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii errno heapq mdiff sha struct zlib")
17 17
18 18 def hash(text, p1, p2):
19 19 """generate a hash from the given text and its parent hashes
20 20
21 21 This hash combines both the current file contents and its history
22 22 in a manner that makes it easy to distinguish nodes with the same
23 23 content in the revision graph.
24 24 """
25 25 l = [p1, p2]
26 26 l.sort()
27 27 s = sha.new(l[0])
28 28 s.update(l[1])
29 29 s.update(text)
30 30 return s.digest()
31 31
32 32 def compress(text):
33 33 """ generate a possibly-compressed representation of text """
34 34 if not text: return text
35 35 if len(text) < 44:
36 36 if text[0] == '\0': return text
37 37 return 'u' + text
38 38 bin = zlib.compress(text)
39 39 if len(bin) > len(text):
40 40 if text[0] == '\0': return text
41 41 return 'u' + text
42 42 return bin
43 43
44 44 def decompress(bin):
45 45 """ decompress the given input """
46 46 if not bin: return bin
47 47 t = bin[0]
48 48 if t == '\0': return bin
49 49 if t == 'x': return zlib.decompress(bin)
50 50 if t == 'u': return bin[1:]
51 51 raise RevlogError(_("unknown compression type %s") % t)
52 52
53 53 indexformat = ">4l20s20s20s"
54 54
55 55 class lazyparser:
56 56 """
57 57 this class avoids the need to parse the entirety of large indices
58 58
59 59 By default we parse and load 1000 entries at a time.
60 60
61 61 If no position is specified, we load the whole index, and replace
62 62 the lazy objects in revlog with the underlying objects for
63 63 efficiency in cases where we look at most of the nodes.
64 64 """
65 65 def __init__(self, data, revlog):
66 66 self.data = data
67 67 self.s = struct.calcsize(indexformat)
68 68 self.l = len(data)/self.s
69 69 self.index = [None] * self.l
70 70 self.map = {nullid: -1}
71 71 self.all = 0
72 72 self.revlog = revlog
73 73
74 74 def load(self, pos=None):
75 75 if self.all: return
76 76 if pos is not None:
77 77 block = pos / 1000
78 78 i = block * 1000
79 79 end = min(self.l, i + 1000)
80 80 else:
81 81 self.all = 1
82 82 i = 0
83 83 end = self.l
84 84 self.revlog.index = self.index
85 85 self.revlog.nodemap = self.map
86 86
87 87 while i < end:
88 88 d = self.data[i * self.s: (i + 1) * self.s]
89 89 e = struct.unpack(indexformat, d)
90 90 self.index[i] = e
91 91 self.map[e[6]] = i
92 92 i += 1
93 93
94 94 class lazyindex:
95 95 """a lazy version of the index array"""
96 96 def __init__(self, parser):
97 97 self.p = parser
98 98 def __len__(self):
99 99 return len(self.p.index)
100 100 def load(self, pos):
101 101 if pos < 0:
102 102 pos += len(self.p.index)
103 103 self.p.load(pos)
104 104 return self.p.index[pos]
105 105 def __getitem__(self, pos):
106 106 return self.p.index[pos] or self.load(pos)
107 107 def append(self, e):
108 108 self.p.index.append(e)
109 109
110 110 class lazymap:
111 111 """a lazy version of the node map"""
112 112 def __init__(self, parser):
113 113 self.p = parser
114 114 def load(self, key):
115 115 if self.p.all: return
116 116 n = self.p.data.find(key)
117 117 if n < 0:
118 118 raise KeyError(key)
119 119 pos = n / self.p.s
120 120 self.p.load(pos)
121 121 def __contains__(self, key):
122 122 self.p.load()
123 123 return key in self.p.map
124 124 def __iter__(self):
125 125 yield nullid
126 126 for i in xrange(self.p.l):
127 127 try:
128 128 yield self.p.index[i][6]
129 129 except:
130 130 self.p.load(i)
131 131 yield self.p.index[i][6]
132 132 def __getitem__(self, key):
133 133 try:
134 134 return self.p.map[key]
135 135 except KeyError:
136 136 try:
137 137 self.load(key)
138 138 return self.p.map[key]
139 139 except KeyError:
140 140 raise KeyError("node " + hex(key))
141 141 def __setitem__(self, key, val):
142 142 self.p.map[key] = val
143 143
144 144 class RevlogError(Exception): pass
145 145
146 146 class revlog:
147 147 """
148 148 the underlying revision storage object
149 149
150 150 A revlog consists of two parts, an index and the revision data.
151 151
152 152 The index is a file with a fixed record size containing
153 153 information on each revision, includings its nodeid (hash), the
154 154 nodeids of its parents, the position and offset of its data within
155 155 the data file, and the revision it's based on. Finally, each entry
156 156 contains a linkrev entry that can serve as a pointer to external
157 157 data.
158 158
159 159 The revision data itself is a linear collection of data chunks.
160 160 Each chunk represents a revision and is usually represented as a
161 161 delta against the previous chunk. To bound lookup time, runs of
162 162 deltas are limited to about 2 times the length of the original
163 163 version data. This makes retrieval of a version proportional to
164 164 its size, or O(1) relative to the number of revisions.
165 165
166 166 Both pieces of the revlog are written to in an append-only
167 167 fashion, which means we never need to rewrite a file to insert or
168 168 remove data, and can use some simple techniques to avoid the need
169 169 for locking while reading.
170 170 """
171 171 def __init__(self, opener, indexfile, datafile):
172 172 """
173 173 create a revlog object
174 174
175 175 opener is a function that abstracts the file opening operation
176 176 and can be used to implement COW semantics or the like.
177 177 """
178 178 self.indexfile = indexfile
179 179 self.datafile = datafile
180 180 self.opener = opener
181 181 self.cache = None
182 182
183 183 try:
184 184 i = self.opener(self.indexfile).read()
185 185 except IOError, inst:
186 186 if inst.errno != errno.ENOENT:
187 187 raise
188 188 i = ""
189 189
190 190 if len(i) > 10000:
191 191 # big index, let's parse it on demand
192 192 parser = lazyparser(i, self)
193 193 self.index = lazyindex(parser)
194 194 self.nodemap = lazymap(parser)
195 195 else:
196 196 s = struct.calcsize(indexformat)
197 197 l = len(i) / s
198 198 self.index = [None] * l
199 199 m = [None] * l
200 200
201 201 n = 0
202 202 for f in xrange(0, len(i), s):
203 203 # offset, size, base, linkrev, p1, p2, nodeid
204 204 e = struct.unpack(indexformat, i[f:f + s])
205 205 m[n] = (e[6], n)
206 206 self.index[n] = e
207 207 n += 1
208 208
209 209 self.nodemap = dict(m)
210 210 self.nodemap[nullid] = -1
211 211
212 212 def tip(self): return self.node(len(self.index) - 1)
213 213 def count(self): return len(self.index)
214 214 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
215 215 def rev(self, node):
216 216 try:
217 217 return self.nodemap[node]
218 218 except KeyError:
219 219 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
220 220 def linkrev(self, node): return self.index[self.rev(node)][3]
221 221 def parents(self, node):
222 222 if node == nullid: return (nullid, nullid)
223 223 return self.index[self.rev(node)][4:6]
224 224
225 225 def start(self, rev): return self.index[rev][0]
226 226 def length(self, rev): return self.index[rev][1]
227 227 def end(self, rev): return self.start(rev) + self.length(rev)
228 228 def base(self, rev): return self.index[rev][2]
229 229
230 230 def reachable(self, rev, stop=None):
231 231 reachable = {}
232 232 visit = [rev]
233 233 reachable[rev] = 1
234 234 if stop:
235 235 stopn = self.rev(stop)
236 236 else:
237 237 stopn = 0
238 238 while visit:
239 239 n = visit.pop(0)
240 240 if n == stop:
241 241 continue
242 242 if n == nullid:
243 243 continue
244 244 for p in self.parents(n):
245 245 if self.rev(p) < stopn:
246 246 continue
247 247 if p not in reachable:
248 248 reachable[p] = 1
249 249 visit.append(p)
250 250 return reachable
251 251
252 252 def nodesbetween(self, roots=None, heads=None):
253 253 """Return a tuple containing three elements. Elements 1 and 2 contain
254 254 a final list bases and heads after all the unreachable ones have been
255 255 pruned. Element 0 contains a topologically sorted list of all
256 256
257 257 nodes that satisfy these constraints:
258 258 1. All nodes must be descended from a node in roots (the nodes on
259 259 roots are considered descended from themselves).
260 260 2. All nodes must also be ancestors of a node in heads (the nodes in
261 261 heads are considered to be their own ancestors).
262 262
263 263 If roots is unspecified, nullid is assumed as the only root.
264 264 If heads is unspecified, it is taken to be the output of the
265 265 heads method (i.e. a list of all nodes in the repository that
266 266 have no children)."""
267 267 nonodes = ([], [], [])
268 268 if roots is not None:
269 269 roots = list(roots)
270 270 if not roots:
271 271 return nonodes
272 272 lowestrev = min([self.rev(n) for n in roots])
273 273 else:
274 274 roots = [nullid] # Everybody's a descendent of nullid
275 275 lowestrev = -1
276 276 if (lowestrev == -1) and (heads is None):
277 277 # We want _all_ the nodes!
278 278 return ([self.node(r) for r in xrange(0, self.count())],
279 279 [nullid], list(self.heads()))
280 280 if heads is None:
281 281 # All nodes are ancestors, so the latest ancestor is the last
282 282 # node.
283 283 highestrev = self.count() - 1
284 284 # Set ancestors to None to signal that every node is an ancestor.
285 285 ancestors = None
286 286 # Set heads to an empty dictionary for later discovery of heads
287 287 heads = {}
288 288 else:
289 289 heads = list(heads)
290 290 if not heads:
291 291 return nonodes
292 292 ancestors = {}
293 293 # Start at the top and keep marking parents until we're done.
294 294 nodestotag = heads[:]
295 295 # Turn heads into a dictionary so we can remove 'fake' heads.
296 296 # Also, later we will be using it to filter out the heads we can't
297 297 # find from roots.
298 298 heads = dict.fromkeys(heads, 0)
299 299 # Remember where the top was so we can use it as a limit later.
300 300 highestrev = max([self.rev(n) for n in nodestotag])
301 301 while nodestotag:
302 302 # grab a node to tag
303 303 n = nodestotag.pop()
304 304 # Never tag nullid
305 305 if n == nullid:
306 306 continue
307 307 # A node's revision number represents its place in a
308 308 # topologically sorted list of nodes.
309 309 r = self.rev(n)
310 310 if r >= lowestrev:
311 311 if n not in ancestors:
312 312 # If we are possibly a descendent of one of the roots
313 313 # and we haven't already been marked as an ancestor
314 314 ancestors[n] = 1 # Mark as ancestor
315 315 # Add non-nullid parents to list of nodes to tag.
316 316 nodestotag.extend([p for p in self.parents(n) if
317 317 p != nullid])
318 318 elif n in heads: # We've seen it before, is it a fake head?
319 319 # So it is, real heads should not be the ancestors of
320 320 # any other heads.
321 321 heads.pop(n)
322 322 if not ancestors:
323 323 return nonodes
324 324 # Now that we have our set of ancestors, we want to remove any
325 325 # roots that are not ancestors.
326 326
327 327 # If one of the roots was nullid, everything is included anyway.
328 328 if lowestrev > -1:
329 329 # But, since we weren't, let's recompute the lowest rev to not
330 330 # include roots that aren't ancestors.
331 331
332 332 # Filter out roots that aren't ancestors of heads
333 333 roots = [n for n in roots if n in ancestors]
334 334 # Recompute the lowest revision
335 335 if roots:
336 336 lowestrev = min([self.rev(n) for n in roots])
337 337 else:
338 338 # No more roots? Return empty list
339 339 return nonodes
340 340 else:
341 341 # We are descending from nullid, and don't need to care about
342 342 # any other roots.
343 343 lowestrev = -1
344 344 roots = [nullid]
345 345 # Transform our roots list into a 'set' (i.e. a dictionary where the
346 346 # values don't matter.
347 347 descendents = dict.fromkeys(roots, 1)
348 348 # Also, keep the original roots so we can filter out roots that aren't
349 349 # 'real' roots (i.e. are descended from other roots).
350 350 roots = descendents.copy()
351 351 # Our topologically sorted list of output nodes.
352 352 orderedout = []
353 353 # Don't start at nullid since we don't want nullid in our output list,
354 354 # and if nullid shows up in descedents, empty parents will look like
355 355 # they're descendents.
356 356 for r in xrange(max(lowestrev, 0), highestrev + 1):
357 357 n = self.node(r)
358 358 isdescendent = False
359 359 if lowestrev == -1: # Everybody is a descendent of nullid
360 360 isdescendent = True
361 361 elif n in descendents:
362 362 # n is already a descendent
363 363 isdescendent = True
364 364 # This check only needs to be done here because all the roots
365 365 # will start being marked is descendents before the loop.
366 366 if n in roots:
367 367 # If n was a root, check if it's a 'real' root.
368 368 p = tuple(self.parents(n))
369 369 # If any of its parents are descendents, it's not a root.
370 370 if (p[0] in descendents) or (p[1] in descendents):
371 371 roots.pop(n)
372 372 else:
373 373 p = tuple(self.parents(n))
374 374 # A node is a descendent if either of its parents are
375 375 # descendents. (We seeded the dependents list with the roots
376 376 # up there, remember?)
377 377 if (p[0] in descendents) or (p[1] in descendents):
378 378 descendents[n] = 1
379 379 isdescendent = True
380 380 if isdescendent and ((ancestors is None) or (n in ancestors)):
381 381 # Only include nodes that are both descendents and ancestors.
382 382 orderedout.append(n)
383 383 if (ancestors is not None) and (n in heads):
384 384 # We're trying to figure out which heads are reachable
385 385 # from roots.
386 386 # Mark this head as having been reached
387 387 heads[n] = 1
388 388 elif ancestors is None:
389 389 # Otherwise, we're trying to discover the heads.
390 390 # Assume this is a head because if it isn't, the next step
391 391 # will eventually remove it.
392 392 heads[n] = 1
393 393 # But, obviously its parents aren't.
394 394 for p in self.parents(n):
395 395 heads.pop(p, None)
396 396 heads = [n for n in heads.iterkeys() if heads[n] != 0]
397 397 roots = roots.keys()
398 398 assert orderedout
399 399 assert roots
400 400 assert heads
401 401 return (orderedout, roots, heads)
402 402
403 403 def heads(self, stop=None):
404 404 """return the list of all nodes that have no children"""
405 405 p = {}
406 406 h = []
407 407 stoprev = 0
408 408 if stop and stop in self.nodemap:
409 409 stoprev = self.rev(stop)
410 410
411 411 for r in range(self.count() - 1, -1, -1):
412 412 n = self.node(r)
413 413 if n not in p:
414 414 h.append(n)
415 415 if n == stop:
416 416 break
417 417 if r < stoprev:
418 418 break
419 419 for pn in self.parents(n):
420 420 p[pn] = 1
421 421 return h
422 422
423 423 def children(self, node):
424 424 """find the children of a given node"""
425 425 c = []
426 426 p = self.rev(node)
427 427 for r in range(p + 1, self.count()):
428 428 n = self.node(r)
429 429 for pn in self.parents(n):
430 430 if pn == node:
431 431 c.append(n)
432 432 continue
433 433 elif pn == nullid:
434 434 continue
435 435 return c
436 436
437 437 def lookup(self, id):
438 438 """locate a node based on revision number or subset of hex nodeid"""
439 439 try:
440 440 rev = int(id)
441 441 if str(rev) != id: raise ValueError
442 442 if rev < 0: rev = self.count() + rev
443 443 if rev < 0 or rev >= self.count(): raise ValueError
444 444 return self.node(rev)
445 445 except (ValueError, OverflowError):
446 446 c = []
447 447 for n in self.nodemap:
448 448 if hex(n).startswith(id):
449 449 c.append(n)
450 450 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
451 451 if len(c) < 1: raise RevlogError(_("No match found"))
452 452 return c[0]
453 453
454 454 return None
455 455
456 456 def diff(self, a, b):
457 457 """return a delta between two revisions"""
458 458 return mdiff.textdiff(a, b)
459 459
460 460 def patches(self, t, pl):
461 461 """apply a list of patches to a string"""
462 462 return mdiff.patches(t, pl)
463 463
464 464 def delta(self, node):
465 465 """return or calculate a delta between a node and its predecessor"""
466 466 r = self.rev(node)
467 467 b = self.base(r)
468 468 if r == b:
469 469 return self.diff(self.revision(self.node(r - 1)),
470 470 self.revision(node))
471 471 else:
472 472 f = self.opener(self.datafile)
473 473 f.seek(self.start(r))
474 474 data = f.read(self.length(r))
475 475 return decompress(data)
476 476
477 477 def revision(self, node):
478 478 """return an uncompressed revision of a given"""
479 479 if node == nullid: return ""
480 480 if self.cache and self.cache[0] == node: return self.cache[2]
481 481
482 482 # look up what we need to read
483 483 text = None
484 484 rev = self.rev(node)
485 485 start, length, base, link, p1, p2, node = self.index[rev]
486 486 end = start + length
487 487 if base != rev: start = self.start(base)
488 488
489 489 # do we have useful data cached?
490 490 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
491 491 base = self.cache[1]
492 492 start = self.start(base + 1)
493 493 text = self.cache[2]
494 494 last = 0
495 495
496 496 f = self.opener(self.datafile)
497 497 f.seek(start)
498 498 data = f.read(end - start)
499 499
500 500 if text is None:
501 501 last = self.length(base)
502 502 text = decompress(data[:last])
503 503
504 504 bins = []
505 505 for r in xrange(base + 1, rev + 1):
506 506 s = self.length(r)
507 507 bins.append(decompress(data[last:last + s]))
508 508 last = last + s
509 509
510 510 text = mdiff.patches(text, bins)
511 511
512 512 if node != hash(text, p1, p2):
513 513 raise RevlogError(_("integrity check failed on %s:%d")
514 514 % (self.datafile, rev))
515 515
516 516 self.cache = (node, rev, text)
517 517 return text
518 518
519 519 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
520 520 """add a revision to the log
521 521
522 522 text - the revision data to add
523 523 transaction - the transaction object used for rollback
524 524 link - the linkrev data to add
525 525 p1, p2 - the parent nodeids of the revision
526 526 d - an optional precomputed delta
527 527 """
528 528 if text is None: text = ""
529 529 if p1 is None: p1 = self.tip()
530 530 if p2 is None: p2 = nullid
531 531
532 532 node = hash(text, p1, p2)
533 533
534 534 if node in self.nodemap:
535 535 return node
536 536
537 537 n = self.count()
538 538 t = n - 1
539 539
540 540 if n:
541 541 base = self.base(t)
542 542 start = self.start(base)
543 543 end = self.end(t)
544 544 if not d:
545 545 prev = self.revision(self.tip())
546 546 d = self.diff(prev, text)
547 547 data = compress(d)
548 548 dist = end - start + len(data)
549 549
550 550 # full versions are inserted when the needed deltas
551 551 # become comparable to the uncompressed text
552 552 if not n or dist > len(text) * 2:
553 553 data = compress(text)
554 554 base = n
555 555 else:
556 556 base = self.base(t)
557 557
558 558 offset = 0
559 559 if t >= 0:
560 560 offset = self.end(t)
561 561
562 562 e = (offset, len(data), base, link, p1, p2, node)
563 563
564 564 self.index.append(e)
565 565 self.nodemap[node] = n
566 566 entry = struct.pack(indexformat, *e)
567 567
568 568 transaction.add(self.datafile, e[0])
569 569 self.opener(self.datafile, "a").write(data)
570 570 transaction.add(self.indexfile, n * len(entry))
571 571 self.opener(self.indexfile, "a").write(entry)
572 572
573 573 self.cache = (node, n, text)
574 574 return node
575 575
576 576 def ancestor(self, a, b):
577 577 """calculate the least common ancestor of nodes a and b"""
578 578 # calculate the distance of every node from root
579 579 dist = {nullid: 0}
580 580 for i in xrange(self.count()):
581 581 n = self.node(i)
582 582 p1, p2 = self.parents(n)
583 583 dist[n] = max(dist[p1], dist[p2]) + 1
584 584
585 585 # traverse ancestors in order of decreasing distance from root
586 586 def ancestors(node):
587 587 # we store negative distances because heap returns smallest member
588 588 h = [(-dist[node], node)]
589 589 seen = {}
590 590 earliest = self.count()
591 591 while h:
592 592 d, n = heapq.heappop(h)
593 593 if n not in seen:
594 594 seen[n] = 1
595 595 r = self.rev(n)
596 596 yield (-d, n)
597 597 for p in self.parents(n):
598 598 heapq.heappush(h, (-dist[p], p))
599 599
600 600 def generations(node):
601 601 sg, s = None, {}
602 602 for g,n in ancestors(node):
603 603 if g != sg:
604 604 if sg:
605 605 yield sg, s
606 606 sg, s = g, {n:1}
607 607 else:
608 608 s[n] = 1
609 609 yield sg, s
610 610
611 611 x = generations(a)
612 612 y = generations(b)
613 613 gx = x.next()
614 614 gy = y.next()
615 615
616 616 # increment each ancestor list until it is closer to root than
617 617 # the other, or they match
618 618 while 1:
619 619 #print "ancestor gen %s %s" % (gx[0], gy[0])
620 620 if gx[0] == gy[0]:
621 621 # find the intersection
622 622 i = [ n for n in gx[1] if n in gy[1] ]
623 623 if i:
624 624 return i[0]
625 625 else:
626 626 #print "next"
627 627 gy = y.next()
628 628 gx = x.next()
629 629 elif gx[0] < gy[0]:
630 630 #print "next y"
631 631 gy = y.next()
632 632 else:
633 633 #print "next x"
634 634 gx = x.next()
635 635
636 636 def group(self, nodelist, lookup, infocollect = None):
637 637 """calculate a delta group
638 638
639 639 Given a list of changeset revs, return a set of deltas and
640 640 metadata corresponding to nodes. the first delta is
641 641 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
642 642 have this parent as it has all history before these
643 643 changesets. parent is parent[0]
644 644 """
645 645 revs = [self.rev(n) for n in nodelist]
646 646 needed = dict.fromkeys(revs, 1)
647 647
648 648 # if we don't have any revisions touched by these changesets, bail
649 649 if not revs:
650 650 yield struct.pack(">l", 0)
651 651 return
652 652
653 653 # add the parent of the first rev
654 654 p = self.parents(self.node(revs[0]))[0]
655 655 revs.insert(0, self.rev(p))
656 656
657 657 # for each delta that isn't contiguous in the log, we need to
658 658 # reconstruct the base, reconstruct the result, and then
659 659 # calculate the delta. We also need to do this where we've
660 660 # stored a full version and not a delta
661 661 for i in xrange(0, len(revs) - 1):
662 662 a, b = revs[i], revs[i + 1]
663 663 if a + 1 != b or self.base(b) == b:
664 664 for j in xrange(self.base(a), a + 1):
665 665 needed[j] = 1
666 666 for j in xrange(self.base(b), b + 1):
667 667 needed[j] = 1
668 668
669 669 # calculate spans to retrieve from datafile
670 670 needed = needed.keys()
671 671 needed.sort()
672 672 spans = []
673 673 oo = -1
674 674 ol = 0
675 675 for n in needed:
676 676 if n < 0: continue
677 677 o = self.start(n)
678 678 l = self.length(n)
679 679 if oo + ol == o: # can we merge with the previous?
680 680 nl = spans[-1][2]
681 681 nl.append((n, l))
682 682 ol += l
683 683 spans[-1] = (oo, ol, nl)
684 684 else:
685 685 oo = o
686 686 ol = l
687 687 spans.append((oo, ol, [(n, l)]))
688 688
689 689 # read spans in, divide up chunks
690 690 chunks = {}
691 691 for span in spans:
692 692 # we reopen the file for each span to make http happy for now
693 693 f = self.opener(self.datafile)
694 694 f.seek(span[0])
695 695 data = f.read(span[1])
696 696
697 697 # divide up the span
698 698 pos = 0
699 699 for r, l in span[2]:
700 700 chunks[r] = decompress(data[pos: pos + l])
701 701 pos += l
702 702
703 703 # helper to reconstruct intermediate versions
704 704 def construct(text, base, rev):
705 705 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
706 706 return mdiff.patches(text, bins)
707 707
708 708 # build deltas
709 709 deltas = []
710 710 for d in xrange(0, len(revs) - 1):
711 711 a, b = revs[d], revs[d + 1]
712 712 n = self.node(b)
713 713
714 714 if infocollect is not None:
715 715 infocollect(n)
716 716
717 717 # do we need to construct a new delta?
718 718 if a + 1 != b or self.base(b) == b:
719 719 if a >= 0:
720 720 base = self.base(a)
721 721 ta = chunks[self.base(a)]
722 722 ta = construct(ta, base, a)
723 723 else:
724 724 ta = ""
725 725
726 726 base = self.base(b)
727 727 if a > base:
728 728 base = a
729 729 tb = ta
730 730 else:
731 731 tb = chunks[self.base(b)]
732 732 tb = construct(tb, base, b)
733 733 d = self.diff(ta, tb)
734 734 else:
735 735 d = chunks[b]
736 736
737 737 p = self.parents(n)
738 738 meta = n + p[0] + p[1] + lookup(n)
739 739 l = struct.pack(">l", len(meta) + len(d) + 4)
740 740 yield l
741 741 yield meta
742 742 yield d
743 743
744 744 yield struct.pack(">l", 0)
745 745
746 746 def addgroup(self, revs, linkmapper, transaction, unique=0):
747 747 """
748 748 add a delta group
749 749
750 750 given a set of deltas, add them to the revision log. the
751 751 first delta is against its parent, which should be in our
752 752 log, the rest are against the previous delta.
753 753 """
754 754
755 755 #track the base of the current delta log
756 756 r = self.count()
757 757 t = r - 1
758 758 node = nullid
759 759
760 760 base = prev = -1
761 761 start = end = measure = 0
762 762 if r:
763 763 start = self.start(self.base(t))
764 764 end = self.end(t)
765 765 measure = self.length(self.base(t))
766 766 base = self.base(t)
767 767 prev = self.tip()
768 768
769 769 transaction.add(self.datafile, end)
770 770 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
771 771 dfh = self.opener(self.datafile, "a")
772 772 ifh = self.opener(self.indexfile, "a")
773 773
774 774 # loop through our set of deltas
775 775 chain = None
776 776 for chunk in revs:
777 777 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
778 778 link = linkmapper(cs)
779 779 if node in self.nodemap:
780 780 # this can happen if two branches make the same change
781 781 # if unique:
782 782 # raise RevlogError(_("already have %s") % hex(node[:4]))
783 783 chain = node
784 784 continue
785 785 delta = chunk[80:]
786 786
787 787 if not chain:
788 788 # retrieve the parent revision of the delta chain
789 789 chain = p1
790 790 if not chain in self.nodemap:
791 791 raise RevlogError(_("unknown base %s") % short(chain[:4]))
792 792
793 793 # full versions are inserted when the needed deltas become
794 794 # comparable to the uncompressed text or when the previous
795 795 # version is not the one we have a delta against. We use
796 796 # the size of the previous full rev as a proxy for the
797 797 # current size.
798 798
799 799 if chain == prev:
800 800 cdelta = compress(delta)
801 801
802 802 if chain != prev or (end - start + len(cdelta)) > measure * 2:
803 803 # flush our writes here so we can read it in revision
804 804 dfh.flush()
805 805 ifh.flush()
806 806 text = self.revision(chain)
807 807 text = self.patches(text, [delta])
808 808 chk = self.addrevision(text, transaction, link, p1, p2)
809 809 if chk != node:
810 810 raise RevlogError(_("consistency error adding group"))
811 811 measure = len(text)
812 812 else:
813 813 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
814 814 self.index.append(e)
815 815 self.nodemap[node] = r
816 816 dfh.write(cdelta)
817 817 ifh.write(struct.pack(indexformat, *e))
818 818
819 819 t, r, chain, prev = r, r + 1, node, node
820 820 start = self.start(self.base(t))
821 821 end = self.end(t)
822 822
823 823 dfh.close()
824 824 ifh.close()
825 825 return node
826
827 def checksize(self):
828 expected = 0
829 if self.count():
830 expected = self.end(self.count() - 1)
831 f = self.opener(self.datafile)
832 f.seek(0, 2)
833 actual = f.tell()
834 return expected - actual
General Comments 0
You need to be logged in to leave comments. Login now