##// END OF EJS Templates
Add paranoia to diff code
mpm@selenic.com -
r98:3dde7c87 default
parent child Browse files
Show More
@@ -1,862 +1,866 b''
1 1 # hg.py - repository classes for mercurial
2 2 #
3 3 # Copyright 2005 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms
6 6 # of the GNU General Public License, incorporated herein by reference.
7 7
8 8 import sys, struct, sha, socket, os, time, base64, re, urllib2
9 9 import urllib
10 10 from mercurial import byterange
11 11 from mercurial.transaction import *
12 12 from mercurial.revlog import *
13 13 from difflib import SequenceMatcher
14 14
15 15 class filelog(revlog):
16 16 def __init__(self, opener, path):
17 17 s = self.encodepath(path)
18 18 revlog.__init__(self, opener, os.path.join("data", s + "i"),
19 19 os.path.join("data", s))
20 20
21 21 def encodepath(self, path):
22 22 s = sha.sha(path).digest()
23 23 s = base64.encodestring(s)[:-3]
24 24 s = re.sub("\+", "%", s)
25 25 s = re.sub("/", "_", s)
26 26 return s
27 27
28 28 def read(self, node):
29 29 return self.revision(node)
30 30 def add(self, text, transaction, link, p1=None, p2=None):
31 31 return self.addrevision(text, transaction, link, p1, p2)
32 32
33 33 def annotate(self, node):
34 34 revs = []
35 35 while node != nullid:
36 36 revs.append(node)
37 37 node = self.parents(node)[0]
38 38 revs.reverse()
39 39 prev = []
40 40 annotate = []
41 41 for node in revs:
42 42 curr = self.read(node).splitlines(1)
43 43 linkrev = self.linkrev(node)
44 44 sm = SequenceMatcher(None, prev, curr)
45 45 offset = 0
46 46 for o, m, n, s, t in sm.get_opcodes():
47 47 if o in ('insert','replace'):
48 48 annotate[m+offset:n+offset] = \
49 49 [ (linkrev, l) for l in curr[s:t]]
50 50 if o == 'insert':
51 51 offset += m-n
52 52 elif o == 'delete':
53 53 del annotate[m+offset:n+offset]
54 54 offset -= m-n
55 55 assert len(annotate) == len(curr)
56 56 prev = curr
57 57 return annotate
58 58
59 59 class manifest(revlog):
60 60 def __init__(self, opener):
61 61 self.mapcache = None
62 62 self.listcache = None
63 63 self.addlist = None
64 64 revlog.__init__(self, opener, "00manifest.i", "00manifest.d")
65 65
66 66 def read(self, node):
67 67 if self.mapcache and self.mapcache[0] == node:
68 68 return self.mapcache[1].copy()
69 69 text = self.revision(node)
70 70 map = {}
71 71 self.listcache = (text, text.splitlines(1))
72 72 for l in self.listcache[1]:
73 73 (f, n) = l.split('\0')
74 74 map[f] = bin(n[:40])
75 75 self.mapcache = (node, map)
76 76 return map
77 77
78 78 def diff(self, a, b):
79 79 # this is sneaky, as we're not actually using a and b
80 80 if self.listcache and len(self.listcache[0]) == len(a):
81 return mdiff.diff(self.listcache[1], self.addlist, 1)
81 d = mdiff.diff(self.listcache[1], self.addlist, 1)
82 if mdiff.patch(a, d) != b:
83 sys.stderr.write("*** sortdiff failed, falling back ***\n")
84 return mdiff.textdiff(a, b)
85 return d
82 86 else:
83 87 return mdiff.textdiff(a, b)
84 88
85 89 def add(self, map, transaction, link, p1=None, p2=None):
86 90 files = map.keys()
87 91 files.sort()
88 92
89 93 self.addlist = ["%s\000%s\n" % (f, hex(map[f])) for f in files]
90 94 text = "".join(self.addlist)
91 95
92 96 n = self.addrevision(text, transaction, link, p1, p2)
93 97 self.mapcache = (n, map)
94 98 self.listcache = (text, self.addlist)
95 99
96 100 return n
97 101
98 102 class changelog(revlog):
99 103 def __init__(self, opener):
100 104 revlog.__init__(self, opener, "00changelog.i", "00changelog.d")
101 105
102 106 def extract(self, text):
103 107 if not text:
104 108 return (nullid, "", "0", [], "")
105 109 last = text.index("\n\n")
106 110 desc = text[last + 2:]
107 111 l = text[:last].splitlines()
108 112 manifest = bin(l[0])
109 113 user = l[1]
110 114 date = l[2]
111 115 files = l[3:]
112 116 return (manifest, user, date, files, desc)
113 117
114 118 def read(self, node):
115 119 return self.extract(self.revision(node))
116 120
117 121 def add(self, manifest, list, desc, transaction, p1=None, p2=None):
118 122 user = (os.environ.get("HGUSER") or
119 123 os.environ.get("EMAIL") or
120 124 os.environ.get("LOGNAME", "unknown") + '@' + socket.getfqdn())
121 125 date = "%d %d" % (time.time(), time.timezone)
122 126 list.sort()
123 127 l = [hex(manifest), user, date] + list + ["", desc]
124 128 text = "\n".join(l)
125 129 return self.addrevision(text, transaction, self.count(), p1, p2)
126 130
127 131 class dircache:
128 132 def __init__(self, opener, ui):
129 133 self.opener = opener
130 134 self.dirty = 0
131 135 self.ui = ui
132 136 self.map = None
133 137 def __del__(self):
134 138 if self.dirty: self.write()
135 139 def __getitem__(self, key):
136 140 try:
137 141 return self.map[key]
138 142 except TypeError:
139 143 self.read()
140 144 return self[key]
141 145
142 146 def read(self):
143 147 if self.map is not None: return self.map
144 148
145 149 self.map = {}
146 150 try:
147 151 st = self.opener("dircache").read()
148 152 except: return
149 153
150 154 pos = 0
151 155 while pos < len(st):
152 156 e = struct.unpack(">llll", st[pos:pos+16])
153 157 l = e[3]
154 158 pos += 16
155 159 f = st[pos:pos + l]
156 160 self.map[f] = e[:3]
157 161 pos += l
158 162
159 163 def update(self, files):
160 164 if not files: return
161 165 self.read()
162 166 self.dirty = 1
163 167 for f in files:
164 168 try:
165 169 s = os.stat(f)
166 170 self.map[f] = (s.st_mode, s.st_size, s.st_mtime)
167 171 except IOError:
168 172 self.remove(f)
169 173
170 174 def taint(self, files):
171 175 if not files: return
172 176 self.read()
173 177 self.dirty = 1
174 178 for f in files:
175 179 self.map[f] = (0, -1, 0)
176 180
177 181 def remove(self, files):
178 182 if not files: return
179 183 self.read()
180 184 self.dirty = 1
181 185 for f in files:
182 186 try:
183 187 del self.map[f]
184 188 except KeyError:
185 189 self.ui.warn("Not in dircache: %s\n" % f)
186 190 pass
187 191
188 192 def clear(self):
189 193 self.map = {}
190 194 self.dirty = 1
191 195
192 196 def write(self):
193 197 st = self.opener("dircache", "w")
194 198 for f, e in self.map.items():
195 199 e = struct.pack(">llll", e[0], e[1], e[2], len(f))
196 200 st.write(e + f)
197 201 self.dirty = 0
198 202
199 203 def copy(self):
200 204 self.read()
201 205 return self.map.copy()
202 206
203 207 # used to avoid circular references so destructors work
204 208 def opener(base):
205 209 p = base
206 210 def o(path, mode="r"):
207 211 if p[:7] == "http://":
208 212 f = os.path.join(p, urllib.quote(path))
209 213 return httprangereader(f)
210 214
211 215 f = os.path.join(p, path)
212 216
213 217 if mode != "r" and os.path.isfile(f):
214 218 s = os.stat(f)
215 219 if s.st_nlink > 1:
216 220 file(f + ".tmp", "w").write(file(f).read())
217 221 os.rename(f+".tmp", f)
218 222
219 223 return file(f, mode)
220 224
221 225 return o
222 226
223 227 class localrepository:
224 228 def __init__(self, ui, path=None, create=0):
225 229 self.remote = 0
226 230 if path and path[:7] == "http://":
227 231 self.remote = 1
228 232 self.path = path
229 233 else:
230 234 if not path:
231 235 p = os.getcwd()
232 236 while not os.path.isdir(os.path.join(p, ".hg")):
233 237 p = os.path.dirname(p)
234 238 if p == "/": raise "No repo found"
235 239 path = p
236 240 self.path = os.path.join(path, ".hg")
237 241
238 242 self.root = path
239 243 self.ui = ui
240 244
241 245 if create:
242 246 os.mkdir(self.path)
243 247 os.mkdir(self.join("data"))
244 248
245 249 self.opener = opener(self.path)
246 250 self.manifest = manifest(self.opener)
247 251 self.changelog = changelog(self.opener)
248 252 self.ignorelist = None
249 253 self.tags = None
250 254
251 255 if not self.remote:
252 256 self.dircache = dircache(self.opener, ui)
253 257 try:
254 258 self.current = bin(self.opener("current").read())
255 259 except IOError:
256 260 self.current = None
257 261
258 262 def setcurrent(self, node):
259 263 self.current = node
260 264 self.opener("current", "w").write(hex(node))
261 265
262 266 def ignore(self, f):
263 267 if self.ignorelist is None:
264 268 self.ignorelist = []
265 269 try:
266 270 l = open(os.path.join(self.root, ".hgignore"))
267 271 for pat in l:
268 272 if pat != "\n":
269 273 self.ignorelist.append(re.compile(pat[:-1]))
270 274 except IOError: pass
271 275 for pat in self.ignorelist:
272 276 if pat.search(f): return True
273 277 return False
274 278
275 279 def lookup(self, key):
276 280 if self.tags is None:
277 281 self.tags = {}
278 282 try:
279 283 fl = self.file(".hgtags")
280 284 for l in fl.revision(fl.tip()).splitlines():
281 285 if l:
282 286 n, k = l.split(" ")
283 287 self.tags[k] = bin(n)
284 288 except KeyError: pass
285 289 try:
286 290 return self.tags[key]
287 291 except KeyError:
288 292 return self.changelog.lookup(key)
289 293
290 294 def join(self, f):
291 295 return os.path.join(self.path, f)
292 296
293 297 def file(self, f):
294 298 return filelog(self.opener, f)
295 299
296 300 def transaction(self):
297 301 return transaction(self.opener, self.join("journal"),
298 302 self.join("undo"))
299 303
300 304 def commit(self, parent, update = None, text = ""):
301 305 tr = self.transaction()
302 306
303 307 try:
304 308 remove = [ l[:-1] for l in self.opener("to-remove") ]
305 309 os.unlink(self.join("to-remove"))
306 310
307 311 except IOError:
308 312 remove = []
309 313
310 314 if update == None:
311 315 update = self.diffdir(self.root, parent)[0]
312 316
313 317 # check in files
314 318 new = {}
315 319 linkrev = self.changelog.count()
316 320 for f in update:
317 321 self.ui.note(f + "\n")
318 322 try:
319 323 t = file(f).read()
320 324 except IOError:
321 325 remove.append(f)
322 326 continue
323 327 r = self.file(f)
324 328 new[f] = r.add(t, tr, linkrev)
325 329
326 330 # update manifest
327 331 mmap = self.manifest.read(self.manifest.tip())
328 332 mmap.update(new)
329 333 for f in remove:
330 334 del mmap[f]
331 335 mnode = self.manifest.add(mmap, tr, linkrev)
332 336
333 337 # add changeset
334 338 new = new.keys()
335 339 new.sort()
336 340
337 341 edittext = text + "\n"+"".join(["HG: changed %s\n" % f for f in new])
338 342 edittext += "".join(["HG: removed %s\n" % f for f in remove])
339 343 edittext = self.ui.edit(edittext)
340 344
341 345 n = self.changelog.add(mnode, new, edittext, tr)
342 346 tr.close()
343 347
344 348 self.setcurrent(n)
345 349 self.dircache.update(new)
346 350 self.dircache.remove(remove)
347 351
348 352 def checkdir(self, path):
349 353 d = os.path.dirname(path)
350 354 if not d: return
351 355 if not os.path.isdir(d):
352 356 self.checkdir(d)
353 357 os.mkdir(d)
354 358
355 359 def checkout(self, node):
356 360 # checkout is really dumb at the moment
357 361 # it ought to basically merge
358 362 change = self.changelog.read(node)
359 363 mmap = self.manifest.read(change[0])
360 364
361 365 l = mmap.keys()
362 366 l.sort()
363 367 stats = []
364 368 for f in l:
365 369 self.ui.note(f + "\n")
366 370 r = self.file(f)
367 371 t = r.revision(mmap[f])
368 372 try:
369 373 file(f, "w").write(t)
370 374 except:
371 375 self.checkdir(f)
372 376 file(f, "w").write(t)
373 377
374 378 self.setcurrent(node)
375 379 self.dircache.clear()
376 380 self.dircache.update(l)
377 381
378 382 def diffdir(self, path, changeset):
379 383 changed = []
380 384 mf = {}
381 385 added = []
382 386
383 387 if changeset:
384 388 change = self.changelog.read(changeset)
385 389 mf = self.manifest.read(change[0])
386 390
387 391 if changeset == self.current:
388 392 dc = self.dircache.copy()
389 393 else:
390 394 dc = dict.fromkeys(mf)
391 395
392 396 def fcmp(fn):
393 397 t1 = file(os.path.join(self.root, fn)).read()
394 398 t2 = self.file(fn).revision(mf[fn])
395 399 return cmp(t1, t2)
396 400
397 401 for dir, subdirs, files in os.walk(self.root):
398 402 d = dir[len(self.root)+1:]
399 403 if ".hg" in subdirs: subdirs.remove(".hg")
400 404
401 405 for f in files:
402 406 fn = os.path.join(d, f)
403 407 try: s = os.stat(os.path.join(self.root, fn))
404 408 except: continue
405 409 if fn in dc:
406 410 c = dc[fn]
407 411 del dc[fn]
408 412 if not c:
409 413 if fcmp(fn):
410 414 changed.append(fn)
411 415 elif c[1] != s.st_size:
412 416 changed.append(fn)
413 417 elif c[0] != s.st_mode or c[2] != s.st_mtime:
414 418 if fcmp(fn):
415 419 changed.append(fn)
416 420 else:
417 421 if self.ignore(fn): continue
418 422 added.append(fn)
419 423
420 424 deleted = dc.keys()
421 425 deleted.sort()
422 426
423 427 return (changed, added, deleted)
424 428
425 429 def diffrevs(self, node1, node2):
426 430 changed, added = [], []
427 431
428 432 change = self.changelog.read(node1)
429 433 mf1 = self.manifest.read(change[0])
430 434 change = self.changelog.read(node2)
431 435 mf2 = self.manifest.read(change[0])
432 436
433 437 for fn in mf2:
434 438 if mf1.has_key(fn):
435 439 if mf1[fn] != mf2[fn]:
436 440 changed.append(fn)
437 441 del mf1[fn]
438 442 else:
439 443 added.append(fn)
440 444
441 445 deleted = mf1.keys()
442 446 deleted.sort()
443 447
444 448 return (changed, added, deleted)
445 449
446 450 def add(self, list):
447 451 self.dircache.taint(list)
448 452
449 453 def remove(self, list):
450 454 dl = self.opener("to-remove", "a")
451 455 for f in list:
452 456 dl.write(f + "\n")
453 457
454 458 def branches(self, nodes):
455 459 if not nodes: nodes = [self.changelog.tip()]
456 460 b = []
457 461 for n in nodes:
458 462 t = n
459 463 while n:
460 464 p = self.changelog.parents(n)
461 465 if p[1] != nullid or p[0] == nullid:
462 466 b.append((t, n, p[0], p[1]))
463 467 break
464 468 n = p[0]
465 469 return b
466 470
467 471 def between(self, pairs):
468 472 r = []
469 473
470 474 for top, bottom in pairs:
471 475 n, l, i = top, [], 0
472 476 f = 1
473 477
474 478 while n != bottom:
475 479 p = self.changelog.parents(n)[0]
476 480 if i == f:
477 481 l.append(n)
478 482 f = f * 2
479 483 n = p
480 484 i += 1
481 485
482 486 r.append(l)
483 487
484 488 return r
485 489
486 490 def newer(self, nodes):
487 491 m = {}
488 492 nl = []
489 493 pm = {}
490 494 cl = self.changelog
491 495 t = l = cl.count()
492 496
493 497 # find the lowest numbered node
494 498 for n in nodes:
495 499 l = min(l, cl.rev(n))
496 500 m[n] = 1
497 501
498 502 for i in xrange(l, t):
499 503 n = cl.node(i)
500 504 if n in m: # explicitly listed
501 505 pm[n] = 1
502 506 nl.append(n)
503 507 continue
504 508 for p in cl.parents(n):
505 509 if p in pm: # parent listed
506 510 pm[n] = 1
507 511 nl.append(n)
508 512 break
509 513
510 514 return nl
511 515
512 516 def getchangegroup(self, remote):
513 517 tip = remote.branches([])[0]
514 518 self.ui.debug("remote tip branch is %s:%s\n" %
515 519 (short(tip[0]), short(tip[1])))
516 520 m = self.changelog.nodemap
517 521 unknown = [tip]
518 522 search = []
519 523 fetch = []
520 524
521 525 if tip[0] in m:
522 526 self.ui.note("nothing to do!\n")
523 527 return None
524 528
525 529 while unknown:
526 530 n = unknown.pop(0)
527 531 if n == nullid: break
528 532 if n[1] and n[1] in m: # do we know the base?
529 533 self.ui.debug("found incomplete branch %s\n" % short(n[1]))
530 534 search.append(n) # schedule branch range for scanning
531 535 else:
532 536 if n[2] in m and n[3] in m:
533 537 if n[1] not in fetch:
534 538 self.ui.debug("found new changeset %s\n" %
535 539 short(n[1]))
536 540 fetch.append(n[1]) # earliest unknown
537 541 continue
538 542 for b in remote.branches([n[2], n[3]]):
539 543 if b[0] not in m:
540 544 unknown.append(b)
541 545
542 546 while search:
543 547 n = search.pop(0)
544 548 l = remote.between([(n[0], n[1])])[0]
545 549 p = n[0]
546 550 f = 1
547 551 for i in l + [n[1]]:
548 552 if i in m:
549 553 if f <= 2:
550 554 self.ui.debug("found new branch changeset %s\n" %
551 555 short(p))
552 556 fetch.append(p)
553 557 else:
554 558 self.ui.debug("narrowed branch search to %s:%s\n"
555 559 % (short(p), short(i)))
556 560 search.append((p, i))
557 561 break
558 562 p, f = i, f * 2
559 563
560 564 for f in fetch:
561 565 if f in m:
562 566 raise "already have", short(f[:4])
563 567
564 568 self.ui.note("adding new changesets starting at " +
565 569 " ".join([short(f) for f in fetch]) + "\n")
566 570
567 571 return remote.changegroup(fetch)
568 572
569 573 def changegroup(self, basenodes):
570 574 nodes = self.newer(basenodes)
571 575
572 576 # construct the link map
573 577 linkmap = {}
574 578 for n in nodes:
575 579 linkmap[self.changelog.rev(n)] = n
576 580
577 581 # construct a list of all changed files
578 582 changed = {}
579 583 for n in nodes:
580 584 c = self.changelog.read(n)
581 585 for f in c[3]:
582 586 changed[f] = 1
583 587 changed = changed.keys()
584 588 changed.sort()
585 589
586 590 # the changegroup is changesets + manifests + all file revs
587 591 revs = [ self.changelog.rev(n) for n in nodes ]
588 592
589 593 yield self.changelog.group(linkmap)
590 594 yield self.manifest.group(linkmap)
591 595
592 596 for f in changed:
593 597 g = self.file(f).group(linkmap)
594 598 if not g: raise "couldn't find change to %s" % f
595 599 l = struct.pack(">l", len(f))
596 600 yield "".join([l, f, g])
597 601
598 602 def addchangegroup(self, generator):
599 603 class genread:
600 604 def __init__(self, generator):
601 605 self.g = generator
602 606 self.buf = ""
603 607 def read(self, l):
604 608 while l > len(self.buf):
605 609 try:
606 610 self.buf += self.g.next()
607 611 except StopIteration:
608 612 break
609 613 d, self.buf = self.buf[:l], self.buf[l:]
610 614 return d
611 615
612 616 if not generator: return
613 617 source = genread(generator)
614 618
615 619 def getchunk(add = 0):
616 620 d = source.read(4)
617 621 if not d: return ""
618 622 l = struct.unpack(">l", d)[0]
619 623 return source.read(l - 4 + add)
620 624
621 625 tr = self.transaction()
622 626 simple = True
623 627
624 628 self.ui.status("adding changesets\n")
625 629 # pull off the changeset group
626 630 def report(x):
627 631 self.ui.debug("add changeset %s\n" % short(x))
628 632 return self.changelog.count()
629 633
630 634 csg = getchunk()
631 635 co = self.changelog.tip()
632 636 cn = self.changelog.addgroup(csg, report, tr)
633 637
634 638 self.ui.status("adding manifests\n")
635 639 # pull off the manifest group
636 640 mfg = getchunk()
637 641 mm = self.manifest.tip()
638 642 mo = self.manifest.addgroup(mfg, lambda x: self.changelog.rev(x), tr)
639 643
640 644 # do we need a resolve?
641 645 if self.changelog.ancestor(co, cn) != co:
642 646 simple = False
643 647 resolverev = self.changelog.count()
644 648
645 649 # process the files
646 650 self.ui.status("adding files\n")
647 651 new = {}
648 652 while 1:
649 653 f = getchunk(4)
650 654 if not f: break
651 655 fg = getchunk()
652 656 self.ui.debug("adding %s revisions\n" % f)
653 657 fl = self.file(f)
654 658 o = fl.tip()
655 659 n = fl.addgroup(fg, lambda x: self.changelog.rev(x), tr)
656 660 if not simple:
657 661 if o == n: continue
658 662 # this file has changed between branches, so it must be
659 663 # represented in the merge changeset
660 664 new[f] = self.merge3(fl, f, o, n, tr, resolverev)
661 665
662 666 # For simple merges, we don't need to resolve manifests or changesets
663 667 if simple:
664 668 self.ui.debug("simple merge, skipping resolve\n")
665 669 tr.close()
666 670 return
667 671
668 672 # resolve the manifest to point to all the merged files
669 673 self.ui.status("resolving manifests\n")
670 674 ma = self.manifest.ancestor(mm, mo)
671 675 omap = self.manifest.read(mo) # other
672 676 amap = self.manifest.read(ma) # ancestor
673 677 mmap = self.manifest.read(mm) # mine
674 678 self.ui.debug("ancestor %s local %s other %s\n" %
675 679 (short(ma), short(mm), short(mo)))
676 680 nmap = {}
677 681
678 682 for f, mid in mmap.iteritems():
679 683 if f in omap:
680 684 if mid != omap[f]:
681 685 self.ui.debug("%s versions differ\n" % f)
682 686 if f in new: self.ui.debug("%s updated in resolve\n" % f)
683 687 # use merged version or local version
684 688 nmap[f] = new.get(f, mid)
685 689 else:
686 690 nmap[f] = mid # keep ours
687 691 del omap[f]
688 692 elif f in amap:
689 693 if mid != amap[f]:
690 694 self.ui.debug("local changed %s which other deleted\n" % f)
691 695 pass # we should prompt here
692 696 else:
693 697 self.ui.debug("other deleted %s\n" % f)
694 698 pass # other deleted it
695 699 else:
696 700 self.ui.debug("local created %s\n" %f)
697 701 nmap[f] = mid # we created it
698 702
699 703 del mmap
700 704
701 705 for f, oid in omap.iteritems():
702 706 if f in amap:
703 707 if oid != amap[f]:
704 708 self.ui.debug("other changed %s which we deleted\n" % f)
705 709 pass # this is the nasty case, we should prompt
706 710 else:
707 711 pass # probably safe
708 712 else:
709 713 self.ui.debug("remote created %s\n" % f)
710 714 nmap[f] = new.get(f, oid) # remote created it
711 715
712 716 del omap
713 717 del amap
714 718
715 719 node = self.manifest.add(nmap, tr, resolverev, mm, mo)
716 720
717 721 # Now all files and manifests are merged, we add the changed files
718 722 # and manifest id to the changelog
719 723 self.ui.status("committing merge changeset\n")
720 724 new = new.keys()
721 725 new.sort()
722 726 if co == cn: cn = -1
723 727
724 728 edittext = "\nHG: merge resolve\n" + \
725 729 "".join(["HG: changed %s\n" % f for f in new])
726 730 edittext = self.ui.edit(edittext)
727 731 n = self.changelog.add(node, new, edittext, tr, co, cn)
728 732
729 733 tr.close()
730 734
731 735 def merge3(self, fl, fn, my, other, transaction, link):
732 736 """perform a 3-way merge and append the result"""
733 737
734 738 def temp(prefix, node):
735 739 pre = "%s~%s." % (os.path.basename(fn), prefix)
736 740 (fd, name) = tempfile.mkstemp("", pre)
737 741 f = os.fdopen(fd, "w")
738 742 f.write(fl.revision(node))
739 743 f.close()
740 744 return name
741 745
742 746 base = fl.ancestor(my, other)
743 747 self.ui.note("resolving %s\n" % fn)
744 748 self.ui.debug("local %s remote %s ancestor %s\n" %
745 749 (short(my), short(other), short(base)))
746 750
747 751 if my == base:
748 752 text = fl.revision(other)
749 753 else:
750 754 a = temp("local", my)
751 755 b = temp("remote", other)
752 756 c = temp("parent", base)
753 757
754 758 cmd = os.environ["HGMERGE"]
755 759 self.ui.debug("invoking merge with %s\n" % cmd)
756 760 r = os.system("%s %s %s %s" % (cmd, a, b, c))
757 761 if r:
758 762 raise "Merge failed!"
759 763
760 764 text = open(a).read()
761 765 os.unlink(a)
762 766 os.unlink(b)
763 767 os.unlink(c)
764 768
765 769 return fl.addrevision(text, transaction, link, my, other)
766 770
767 771 class remoterepository:
768 772 def __init__(self, ui, path):
769 773 self.url = path.replace("hg://", "http://", 1)
770 774 self.ui = ui
771 775
772 776 def do_cmd(self, cmd, **args):
773 777 self.ui.debug("sending %s command\n" % cmd)
774 778 q = {"cmd": cmd}
775 779 q.update(args)
776 780 qs = urllib.urlencode(q)
777 781 cu = "%s?%s" % (self.url, qs)
778 782 return urllib.urlopen(cu)
779 783
780 784 def branches(self, nodes):
781 785 n = " ".join(map(hex, nodes))
782 786 d = self.do_cmd("branches", nodes=n).read()
783 787 br = [ map(bin, b.split(" ")) for b in d.splitlines() ]
784 788 return br
785 789
786 790 def between(self, pairs):
787 791 n = "\n".join(["-".join(map(hex, p)) for p in pairs])
788 792 d = self.do_cmd("between", pairs=n).read()
789 793 p = [ map(bin, l.split(" ")) for l in d.splitlines() ]
790 794 return p
791 795
792 796 def changegroup(self, nodes):
793 797 n = " ".join(map(hex, nodes))
794 798 zd = zlib.decompressobj()
795 799 f = self.do_cmd("changegroup", roots=n)
796 800 while 1:
797 801 d = f.read(4096)
798 802 if not d:
799 803 yield zd.flush()
800 804 break
801 805 yield zd.decompress(d)
802 806
803 807 def repository(ui, path=None, create=0):
804 808 if path and path[:5] == "hg://":
805 809 return remoterepository(ui, path)
806 810 else:
807 811 return localrepository(ui, path, create)
808 812
809 813 class ui:
810 814 def __init__(self, verbose=False, debug=False, quiet=False):
811 815 self.quiet = quiet and not verbose and not debug
812 816 self.verbose = verbose or debug
813 817 self.debugflag = debug
814 818 def write(self, *args):
815 819 for a in args:
816 820 sys.stdout.write(str(a))
817 821 def prompt(self, msg, pat):
818 822 while 1:
819 823 sys.stdout.write(msg)
820 824 r = sys.stdin.readline()[:-1]
821 825 if re.match(pat, r):
822 826 return r
823 827 def status(self, *msg):
824 828 if not self.quiet: self.write(*msg)
825 829 def warn(self, msg):
826 830 self.write(*msg)
827 831 def note(self, msg):
828 832 if self.verbose: self.write(*msg)
829 833 def debug(self, msg):
830 834 if self.debugflag: self.write(*msg)
831 835 def edit(self, text):
832 836 (fd, name) = tempfile.mkstemp("hg")
833 837 f = os.fdopen(fd, "w")
834 838 f.write(text)
835 839 f.close()
836 840
837 841 editor = os.environ.get("EDITOR", "vi")
838 842 r = os.system("%s %s" % (editor, name))
839 843 if r:
840 844 raise "Edit failed!"
841 845
842 846 t = open(name).read()
843 847 t = re.sub("(?m)^HG:.*\n", "", t)
844 848
845 849 return t
846 850
847 851
848 852 class httprangereader:
849 853 def __init__(self, url):
850 854 self.url = url
851 855 self.pos = 0
852 856 def seek(self, pos):
853 857 self.pos = pos
854 858 def read(self, bytes=None):
855 859 opener = urllib2.build_opener(byterange.HTTPRangeHandler())
856 860 urllib2.install_opener(opener)
857 861 req = urllib2.Request(self.url)
858 862 end = ''
859 863 if bytes: end = self.pos + bytes
860 864 req.add_header('Range', 'bytes=%d-%s' % (self.pos, end))
861 865 f = urllib2.urlopen(req)
862 866 return f.read()
@@ -1,457 +1,461 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # This provides efficient delta storage with O(1) retrieve and append
4 4 # and O(changes) merge between branches
5 5 #
6 6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
7 7 #
8 8 # This software may be used and distributed according to the terms
9 9 # of the GNU General Public License, incorporated herein by reference.
10 10
11 11 import zlib, struct, sha, os, tempfile, binascii
12 12 from mercurial import mdiff
13 13
14 14 def hex(node): return binascii.hexlify(node)
15 15 def bin(node): return binascii.unhexlify(node)
16 16 def short(node): return hex(node[:4])
17 17
18 18 def compress(text):
19 19 return zlib.compress(text)
20 20
21 21 def decompress(bin):
22 22 return zlib.decompress(bin)
23 23
24 24 def hash(text, p1, p2):
25 25 l = [p1, p2]
26 26 l.sort()
27 27 return sha.sha(l[0] + l[1] + text).digest()
28 28
29 29 nullid = "\0" * 20
30 30 indexformat = ">4l20s20s20s"
31 31
32 32 class lazyparser:
33 33 def __init__(self, data):
34 34 self.data = data
35 35 self.s = struct.calcsize(indexformat)
36 36 self.l = len(data)/self.s
37 37 self.index = [None] * self.l
38 38 self.map = {nullid: -1}
39 39
40 40 if 0:
41 41 n = 0
42 42 i = self.data
43 43 s = struct.calcsize(indexformat)
44 44 for f in xrange(0, len(i), s):
45 45 # offset, size, base, linkrev, p1, p2, nodeid
46 46 e = struct.unpack(indexformat, i[f:f + s])
47 47 self.map[e[6]] = n
48 48 self.index.append(e)
49 49 n += 1
50 50
51 51 def load(self, pos):
52 52 block = pos / 1000
53 53 i = block * 1000
54 54 end = min(self.l, i + 1000)
55 55 while i < end:
56 56 d = self.data[i * self.s: (i + 1) * self.s]
57 57 e = struct.unpack(indexformat, d)
58 58 self.index[i] = e
59 59 self.map[e[6]] = i
60 60 i += 1
61 61
62 62 class lazyindex:
63 63 def __init__(self, parser):
64 64 self.p = parser
65 65 def __len__(self):
66 66 return len(self.p.index)
67 67 def __getitem__(self, pos):
68 68 i = self.p.index[pos]
69 69 if not i:
70 70 self.p.load(pos)
71 71 return self.p.index[pos]
72 72 return i
73 73 def append(self, e):
74 74 self.p.index.append(e)
75 75
76 76 class lazymap:
77 77 def __init__(self, parser):
78 78 self.p = parser
79 79 def load(self, key):
80 80 n = self.p.data.find(key)
81 81 if n < 0: raise KeyError("node " + hex(key))
82 82 pos = n / self.p.s
83 83 self.p.load(pos)
84 84 def __contains__(self, key):
85 85 try:
86 86 self[key]
87 87 return True
88 88 except KeyError:
89 89 return False
90 90 def __iter__(self):
91 91 for i in xrange(self.p.l):
92 92 try:
93 93 yield self.p.index[i][6]
94 94 except:
95 95 self.p.load(i)
96 96 yield self.p.index[i][6]
97 97 def __getitem__(self, key):
98 98 try:
99 99 return self.p.map[key]
100 100 except KeyError:
101 101 try:
102 102 self.load(key)
103 103 return self.p.map[key]
104 104 except KeyError:
105 105 raise KeyError("node " + hex(key))
106 106 def __setitem__(self, key, val):
107 107 self.p.map[key] = val
108 108
109 109 class revlog:
110 110 def __init__(self, opener, indexfile, datafile):
111 111 self.indexfile = indexfile
112 112 self.datafile = datafile
113 113 self.opener = opener
114 114 self.cache = None
115 115 # read the whole index for now, handle on-demand later
116 116 try:
117 117 i = self.opener(self.indexfile).read()
118 118 except IOError:
119 119 i = ""
120 120 parser = lazyparser(i)
121 121 self.index = lazyindex(parser)
122 122 self.nodemap = lazymap(parser)
123 123
124 124 def tip(self): return self.node(len(self.index) - 1)
125 125 def count(self): return len(self.index)
126 126 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
127 127 def rev(self, node): return self.nodemap[node]
128 128 def linkrev(self, node): return self.index[self.nodemap[node]][3]
129 129 def parents(self, node):
130 130 if node == nullid: return (nullid, nullid)
131 131 return self.index[self.nodemap[node]][4:6]
132 132
133 133 def start(self, rev): return self.index[rev][0]
134 134 def length(self, rev): return self.index[rev][1]
135 135 def end(self, rev): return self.start(rev) + self.length(rev)
136 136 def base(self, rev): return self.index[rev][2]
137 137
138 138 def lookup(self, id):
139 139 try:
140 140 rev = int(id)
141 141 return self.node(rev)
142 142 except ValueError:
143 143 c = []
144 144 for n in self.nodemap:
145 145 if id in hex(n):
146 146 c.append(n)
147 147 if len(c) > 1: raise KeyError("Ambiguous identifier")
148 148 if len(c) < 1: raise KeyError("No match found")
149 149 return c[0]
150 150
151 151 return None
152 152
153 153 def diff(self, a, b):
154 154 return mdiff.textdiff(a, b)
155 155
156 156 def patches(self, t, pl):
157 157 return mdiff.patches(t, pl)
158 158
159 159 def revision(self, node):
160 160 if node == nullid: return ""
161 161 if self.cache and self.cache[0] == node: return self.cache[2]
162 162
163 163 text = None
164 164 rev = self.rev(node)
165 165 base = self.base(rev)
166 166 start = self.start(base)
167 167 end = self.end(rev)
168 168
169 169 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
170 170 base = self.cache[1]
171 171 start = self.start(base + 1)
172 172 text = self.cache[2]
173 173 last = 0
174 174
175 175 f = self.opener(self.datafile)
176 176 f.seek(start)
177 177 data = f.read(end - start)
178 178
179 179 if not text:
180 180 last = self.length(base)
181 181 text = decompress(data[:last])
182 182
183 183 bins = []
184 184 for r in xrange(base + 1, rev + 1):
185 185 s = self.length(r)
186 186 bins.append(decompress(data[last:last + s]))
187 187 last = last + s
188 188
189 189 text = mdiff.patches(text, bins)
190 190
191 191 (p1, p2) = self.parents(node)
192 192 if node != hash(text, p1, p2):
193 raise "integrity check failed on %s:%d" % (self.datafile, rev)
193 raise IOError("integrity check failed on %s:%d"
194 % (self.datafile, rev))
194 195
195 196 self.cache = (node, rev, text)
196 197 return text
197 198
198 199 def addrevision(self, text, transaction, link, p1=None, p2=None):
199 200 if text is None: text = ""
200 201 if p1 is None: p1 = self.tip()
201 202 if p2 is None: p2 = nullid
202 203
203 204 node = hash(text, p1, p2)
204 205
205 206 n = self.count()
206 207 t = n - 1
207 208
208 209 if n:
209 210 base = self.base(t)
210 211 start = self.start(base)
211 212 end = self.end(t)
212 213 prev = self.revision(self.tip())
213 data = compress(self.diff(prev, text))
214 d = self.diff(prev, text)
215 if self.patches(prev, [d]) != text:
216 raise AssertionError("diff failed")
217 data = compress(d)
214 218 dist = end - start + len(data)
215 219
216 220 # full versions are inserted when the needed deltas
217 221 # become comparable to the uncompressed text
218 222 if not n or dist > len(text) * 2:
219 223 data = compress(text)
220 224 base = n
221 225 else:
222 226 base = self.base(t)
223 227
224 228 offset = 0
225 229 if t >= 0:
226 230 offset = self.end(t)
227 231
228 232 e = (offset, len(data), base, link, p1, p2, node)
229 233
230 234 self.index.append(e)
231 235 self.nodemap[node] = n
232 236 entry = struct.pack(indexformat, *e)
233 237
234 238 transaction.add(self.datafile, e[0])
235 239 self.opener(self.datafile, "a").write(data)
236 240 transaction.add(self.indexfile, n * len(entry))
237 241 self.opener(self.indexfile, "a").write(entry)
238 242
239 243 self.cache = (node, n, text)
240 244 return node
241 245
242 246 def ancestor(self, a, b):
243 247 def expand(list, map):
244 248 a = []
245 249 while list:
246 250 n = list.pop(0)
247 251 map[n] = 1
248 252 yield n
249 253 for p in self.parents(n):
250 254 if p != nullid and p not in map:
251 255 list.append(p)
252 256 yield nullid
253 257
254 258 amap = {}
255 259 bmap = {}
256 260 ag = expand([a], amap)
257 261 bg = expand([b], bmap)
258 262 adone = bdone = 0
259 263
260 264 while not adone or not bdone:
261 265 if not adone:
262 266 an = ag.next()
263 267 if an == nullid:
264 268 adone = 1
265 269 elif an in bmap:
266 270 return an
267 271 if not bdone:
268 272 bn = bg.next()
269 273 if bn == nullid:
270 274 bdone = 1
271 275 elif bn in amap:
272 276 return bn
273 277
274 278 return nullid
275 279
276 280 def group(self, linkmap):
277 281 # given a list of changeset revs, return a set of deltas and
278 282 # metadata corresponding to nodes. the first delta is
279 283 # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
280 284 # have this parent as it has all history before these
281 285 # changesets. parent is parent[0]
282 286
283 287 revs = []
284 288 needed = {}
285 289
286 290 # find file nodes/revs that match changeset revs
287 291 for i in xrange(0, self.count()):
288 292 if self.index[i][3] in linkmap:
289 293 revs.append(i)
290 294 needed[i] = 1
291 295
292 296 # if we don't have any revisions touched by these changesets, bail
293 297 if not revs: return struct.pack(">l", 0)
294 298
295 299 # add the parent of the first rev
296 300 p = self.parents(self.node(revs[0]))[0]
297 301 revs.insert(0, self.rev(p))
298 302
299 303 # for each delta that isn't contiguous in the log, we need to
300 304 # reconstruct the base, reconstruct the result, and then
301 305 # calculate the delta. We also need to do this where we've
302 306 # stored a full version and not a delta
303 307 for i in xrange(0, len(revs) - 1):
304 308 a, b = revs[i], revs[i + 1]
305 309 if a + 1 != b or self.base(b) == b:
306 310 for j in xrange(self.base(a), a + 1):
307 311 needed[j] = 1
308 312 for j in xrange(self.base(b), b + 1):
309 313 needed[j] = 1
310 314
311 315 # calculate spans to retrieve from datafile
312 316 needed = needed.keys()
313 317 needed.sort()
314 318 spans = []
315 319 for n in needed:
316 320 if n < 0: continue
317 321 o = self.start(n)
318 322 l = self.length(n)
319 323 spans.append((o, l, [(n, l)]))
320 324
321 325 # merge spans
322 326 merge = [spans.pop(0)]
323 327 while spans:
324 328 e = spans.pop(0)
325 329 f = merge[-1]
326 330 if e[0] == f[0] + f[1]:
327 331 merge[-1] = (f[0], f[1] + e[1], f[2] + e[2])
328 332 else:
329 333 merge.append(e)
330 334
331 335 # read spans in, divide up chunks
332 336 chunks = {}
333 337 for span in merge:
334 338 # we reopen the file for each span to make http happy for now
335 339 f = self.opener(self.datafile)
336 340 f.seek(span[0])
337 341 data = f.read(span[1])
338 342
339 343 # divide up the span
340 344 pos = 0
341 345 for r, l in span[2]:
342 346 chunks[r] = data[pos: pos + l]
343 347 pos += l
344 348
345 349 # helper to reconstruct intermediate versions
346 350 def construct(text, base, rev):
347 351 bins = [decompress(chunks[r]) for r in xrange(base + 1, rev + 1)]
348 352 return mdiff.patches(text, bins)
349 353
350 354 # build deltas
351 355 deltas = []
352 356 for d in xrange(0, len(revs) - 1):
353 357 a, b = revs[d], revs[d + 1]
354 358 n = self.node(b)
355 359
356 360 if a + 1 != b or self.base(b) == b:
357 361 if a >= 0:
358 362 base = self.base(a)
359 363 ta = decompress(chunks[self.base(a)])
360 364 ta = construct(ta, base, a)
361 365 else:
362 366 ta = ""
363 367
364 368 base = self.base(b)
365 369 if a > base:
366 370 base = a
367 371 tb = ta
368 372 else:
369 373 tb = decompress(chunks[self.base(b)])
370 374 tb = construct(tb, base, b)
371 375 d = self.diff(ta, tb)
372 376 else:
373 377 d = decompress(chunks[b])
374 378
375 379 p = self.parents(n)
376 380 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
377 381 l = struct.pack(">l", len(meta) + len(d) + 4)
378 382 deltas.append(l + meta + d)
379 383
380 384 l = struct.pack(">l", sum(map(len, deltas)) + 4)
381 385 deltas.insert(0, l)
382 386 return "".join(deltas)
383 387
384 388 def addgroup(self, data, linkmapper, transaction):
385 389 # given a set of deltas, add them to the revision log. the
386 390 # first delta is against its parent, which should be in our
387 391 # log, the rest are against the previous delta.
388 392
389 393 if not data: return self.tip()
390 394
391 395 # retrieve the parent revision of the delta chain
392 396 chain = data[24:44]
393 397 if not chain in self.nodemap:
394 398 raise "unknown base %s" % short(chain[:4])
395 399
396 400 # track the base of the current delta log
397 401 r = self.count()
398 402 t = r - 1
399 403
400 404 base = prev = -1
401 405 start = end = 0
402 406 if r:
403 407 start = self.start(self.base(t))
404 408 end = self.end(t)
405 409 measure = self.length(self.base(t))
406 410 base = self.base(t)
407 411 prev = self.tip()
408 412
409 413 transaction.add(self.datafile, end)
410 414 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
411 415 dfh = self.opener(self.datafile, "a")
412 416 ifh = self.opener(self.indexfile, "a")
413 417
414 418 # loop through our set of deltas
415 419 pos = 0
416 420 while pos < len(data):
417 421 l, node, p1, p2, cs = struct.unpack(">l20s20s20s20s",
418 422 data[pos:pos+84])
419 423 link = linkmapper(cs)
420 424 if node in self.nodemap:
421 425 raise "already have %s" % hex(node[:4])
422 426 delta = data[pos + 84:pos + l]
423 427 pos += l
424 428
425 429 # full versions are inserted when the needed deltas become
426 430 # comparable to the uncompressed text or when the previous
427 431 # version is not the one we have a delta against. We use
428 432 # the size of the previous full rev as a proxy for the
429 433 # current size.
430 434
431 435 if chain == prev:
432 436 cdelta = compress(delta)
433 437
434 438 if chain != prev or (end - start + len(cdelta)) > measure * 2:
435 439 # flush our writes here so we can read it in revision
436 440 dfh.flush()
437 441 ifh.flush()
438 442 text = self.revision(chain)
439 443 text = self.patches(text, [delta])
440 444 chk = self.addrevision(text, transaction, link, p1, p2)
441 445 if chk != node:
442 446 raise "consistency error adding group"
443 447 measure = len(text)
444 448 else:
445 449 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
446 450 self.index.append(e)
447 451 self.nodemap[node] = r
448 452 dfh.write(cdelta)
449 453 ifh.write(struct.pack(indexformat, *e))
450 454
451 455 t, r, chain, prev = r, r + 1, node, node
452 456 start = self.start(self.base(t))
453 457 end = self.end(t)
454 458
455 459 dfh.close()
456 460 ifh.close()
457 461 return node
General Comments 0
You need to be logged in to leave comments. Login now