upstream/mercurial-mirror Commit - r71:47c9a869

Add mdiff.patches to speed up applying thousands of patches to the manifest

mpm@selenic.com -

r71:47c9a869 default

parent child

mercurial/mdiff.py

0 +71 -2

		@@ -1,6 +1,7 b''
1	1	#!/usr/bin/python
2		import difflib, struct
3		from cStringIO import StringIO
	2	import difflib, struct, mmap
	3
	4	devzero = file("/dev/zero")
4	5
5	6	def unidiff(a, ad, b, bd, fn):
6	7	if not a and not b: return ""
		@@ -52,6 +53,74 b' def diff(a, b, sorted=0):'
52	53
53	54	return "".join(bin)
54	55
	56	# This attempts to apply a series of patches in time proportional to
	57	# the total size of the patches, rather than patches * len(text). This
	58	# means rather than shuffling strings around, we shuffle around
	59	# pointers to fragments with fragment lists.
	60	#
	61	# When the fragment lists get too long, we collapse them. To do this
	62	# efficiently, we do all our operations inside a buffer created by
	63	# mmap and simply use memmove. This avoids creating a bunch of large
	64	# temporary string buffers.
	65
	66	def patches(a, bins):
	67	if not bins: return a
	68
	69	plens = [len(x) for x in bins]
	70	pl = sum(plens)
	71	bl = len(a) + pl
	72	tl = bl + bl + pl # enough for the patches and two working texts
	73	b1, b2 = 0, bl
	74	m = mmap.mmap(devzero.fileno(), tl, mmap.MAP_PRIVATE)
	75
	76	# load our original text
	77	m.write(a)
	78	frags = [(len(a), b1)]
	79
	80	# copy all the patches into our segment so we can memmove from them
	81	pos = b2 + bl
	82	m.seek(pos)
	83	for p in bins: m.write(p)
	84
	85	def pull(dst, src, l): # pull l bytes from src
	86	while l:
	87	f = src.pop(0)
	88	if f[0] > l: # do we need to split?
	89	src.insert(0, (f[0] - l, f[1] + l))
	90	dst.append((l, f[1]))
	91	return
	92	dst.append(f)
	93	l -= f[0]
	94
	95	def collect(buf, list):
	96	start = buf
	97	for l, p in list:
	98	m.move(buf, p, l)
	99	buf += l
	100	return (buf - start, start)
	101
	102	for plen in plens:
	103	# if our list gets too long, execute it
	104	if len(frags) > 128:
	105	b2, b1 = b1, b2
	106	frags = [collect(b1, frags)]
	107
	108	new = []
	109	end = pos + plen
	110	last = 0
	111	while pos < end:
	112	p1, p2, l = struct.unpack(">lll", m[pos:pos + 12])
	113	pull(new, frags, p1 - last) # what didn't change
	114	pull([], frags, p2 - p1) # what got deleted
	115	new.append((l, pos + 12)) # what got added
	116	pos += l + 12
	117	last = p2
	118	frags = new + frags # what was left at the end
	119
	120	t = collect(b2, frags)
	121
	122	return m[t[1]:t[1] + t[0]]
	123
55	124	def patch(a, bin):
56	125	last = pos = 0
57	126	r = []

mercurial/revlog.py

0 +8 -11

                          n = 0
                          i = self.opener(self.indexfile).read()
                          s = struct.calcsize(indexformat)
-                         for f in range(0, len(i), s):
+                         for f in xrange(0, len(i), s):
                              # offset, size, base, linkrev, p1, p2, nodeid
                              e = struct.unpack(indexformat, i[f:f + s])
                              self.nodemap[e[6]] = n
                  def diff(self, a, b):
                      return mdiff.textdiff(a, b)
-                 def patch(self, text, patch):
-                     return mdiff.patch(text, patch)
                  def revision(self, node):
                      if node == nullid: return ""
                      if self.cache and self.cache[0] == node: return self.cache[2]
                          last = self.length(base)
                          text = decompress(data[:last])
+                     bins = []
                      for r in xrange(base + 1, rev + 1):
                          s = self.length(r)
-                         b = decompress(data[last:last + s])
-                         text = self.patch(text, b)
+                         bins.append(decompress(data[last:last + s]))
                          last = last + s
+                     text = mdiff.patches(text, bins)
                      (p1, p2) = self.parents(node)
                      if node != hash(text, p1, p2):
                          raise "integrity check failed on %s:%d" % (self.datafile, rev)
                      # helper to reconstruct intermediate versions
                      def construct(text, base, rev):
-                         for r in range(base + 1, rev + 1):
-                             b = decompress(chunks[r])
-                             text = self.patch(text, b)
-                         return text
+                         bins = [decompress(chunks[r]) for r in xrange(base + 1, rev + 1)]
+                         return mdiff.patches(text, bins)
                      # build deltas
                      deltas = []
-                     for d in range(0, len(revs) - 1):
+                     for d in xrange(0, len(revs) - 1):
                          a, b = revs[d], revs[d + 1]
                          n = self.node(b)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages