upstream/mercurial-mirror Files · mercurial/pure/bdiff.py

perf: benchmark command for revlog indexes...

perf: benchmark command for revlog indexes We didn't have explicit microbenchmark coverage for loading revlog indexes. That seems like a useful thing to have, so let's add it. We currently measure the low-level nodemap APIs. There is room to hook in at the actual revlog layer. This could be done as a follow-up. The hackiest thing about this patch is specifying revlog paths. Other commands have arguments that allow resolution of changelog, manifest, and filelog. I needed to hook in at a lower level of the revlog API than what the existing helper functions to resolve revlogs allowed. I was too lazy to write some new APIs. This could be done as a follow-up easily enough. Example output for `hg perfrevlogindex 00changelog.i` on my Firefox repo (404418 revisions): ! revlog constructor ! wall 0.003106 comb 0.000000 user 0.000000 sys 0.000000 (best of 912) ! read ! wall 0.003077 comb 0.000000 user 0.000000 sys 0.000000 (best of 924) ! create index object ! wall 0.000000 comb 0.000000 user 0.000000 sys 0.000000 (best of 1803994) ! retrieve index entry for rev 0 ! wall 0.000193 comb 0.000000 user 0.000000 sys 0.000000 (best of 14037) ! look up missing node ! wall 0.003313 comb 0.000000 user 0.000000 sys 0.000000 (best of 865) ! look up node at rev 0 ! wall 0.003295 comb 0.010000 user 0.010000 sys 0.000000 (best of 858) ! look up node at 1/4 len ! wall 0.002598 comb 0.010000 user 0.010000 sys 0.000000 (best of 1103) ! look up node at 1/2 len ! wall 0.001909 comb 0.000000 user 0.000000 sys 0.000000 (best of 1507) ! look up node at 3/4 len ! wall 0.001213 comb 0.000000 user 0.000000 sys 0.000000 (best of 2275) ! look up node at tip ! wall 0.000453 comb 0.000000 user 0.000000 sys 0.000000 (best of 5697) ! look up all nodes (forward) ! wall 0.094615 comb 0.100000 user 0.100000 sys 0.000000 (best of 100) ! look up all nodes (reverse) ! wall 0.045889 comb 0.050000 user 0.050000 sys 0.000000 (best of 100) ! retrieve all index entries (forward) ! wall 0.078398 comb 0.080000 user 0.060000 sys 0.020000 (best of 100) ! retrieve all index entries (reverse) ! wall 0.079376 comb 0.080000 user 0.070000 sys 0.010000 (best of 100)

Yuya Nishihara - - Load All Authors

File last commit:

r32512:0e8b0b9a default


                r32532:e4f51462

default

Download file

             bdiff.py
        
                    91 lines
            
             | 2.3 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / pure / bdiff.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # bdiff.py - Python implementation of bdiff.c

      #

      # Copyright 2009 Matt Mackall <mpm@selenic.com> and others

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      from __future__ import absolute_import

      import difflib

      import re

      import struct

      def splitnewlines(text):

          '''like str.splitlines, but only split on newlines.'''

          lines = [l + '\n' for l in text.split('\n')]

          if lines:

              if lines[-1] == '\n':

                  lines.pop()

              else:

                  lines[-1] = lines[-1][:-1]

          return lines

      def _normalizeblocks(a, b, blocks):

          prev = None

          r = []

          for curr in blocks:

              if prev is None:

                  prev = curr

                  continue

              shift = 0

              a1, b1, l1 = prev

              a1end = a1 + l1

              b1end = b1 + l1

              a2, b2, l2 = curr

              a2end = a2 + l2

              b2end = b2 + l2

              if a1end == a2:

                  while (a1end + shift < a2end and

                         a[a1end + shift] == b[b1end + shift]):

                      shift += 1

              elif b1end == b2:

                  while (b1end + shift < b2end and

                         a[a1end + shift] == b[b1end + shift]):

                      shift += 1

              r.append((a1, b1, l1 + shift))

              prev = a2 + shift, b2 + shift, l2 - shift

          r.append(prev)

          return r

      def bdiff(a, b):

          a = bytes(a).splitlines(True)

          b = bytes(b).splitlines(True)

          if not a:

              s = "".join(b)

              return s and (struct.pack(">lll", 0, 0, len(s)) + s)

          bin = []

          p = [0]

          for i in a: p.append(p[-1] + len(i))

          d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()

          d = _normalizeblocks(a, b, d)

          la = 0

          lb = 0

          for am, bm, size in d:

              s = "".join(b[lb:bm])

              if am > la or s:

                  bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)

              la = am + size

              lb = bm + size

          return "".join(bin)

      def blocks(a, b):

          an = splitnewlines(a)

          bn = splitnewlines(b)

          d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()

          d = _normalizeblocks(an, bn, d)

          return [(i, i + n, j, j + n) for (i, j, n) in d]

      def fixws(text, allws):

          if allws:

              text = re.sub('[ \t\r]+', '', text)

          else:

              text = re.sub('[ \t\r]+', ' ', text)

              text = text.replace(' \n', '\n')

          return text

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# bdiff.py - Python implementation of bdiff.c
				#
				# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
				#
				# This software may be used and distributed according to the terms of the
				# GNU General Public License version 2 or any later version.

				from __future__ import absolute_import

				import difflib
				import re
				import struct

				def splitnewlines(text):
				'''like str.splitlines, but only split on newlines.'''
				lines = [l + '\n' for l in text.split('\n')]
				if lines:
				if lines[-1] == '\n':
				lines.pop()
				else:
				lines[-1] = lines[-1][:-1]
				return lines

				def _normalizeblocks(a, b, blocks):
				prev = None
				r = []
				for curr in blocks:
				if prev is None:
				prev = curr
				continue
				shift = 0

				a1, b1, l1 = prev
				a1end = a1 + l1
				b1end = b1 + l1

				a2, b2, l2 = curr
				a2end = a2 + l2
				b2end = b2 + l2
				if a1end == a2:
				while (a1end + shift < a2end and
				a[a1end + shift] == b[b1end + shift]):
				shift += 1
				elif b1end == b2:
				while (b1end + shift < b2end and
				a[a1end + shift] == b[b1end + shift]):
				shift += 1
				r.append((a1, b1, l1 + shift))
				prev = a2 + shift, b2 + shift, l2 - shift
				r.append(prev)
				return r

				def bdiff(a, b):
				a = bytes(a).splitlines(True)
				b = bytes(b).splitlines(True)

				if not a:
				s = "".join(b)
				return s and (struct.pack(">lll", 0, 0, len(s)) + s)

				bin = []
				p = [0]
				for i in a: p.append(p[-1] + len(i))

				d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
				d = _normalizeblocks(a, b, d)
				la = 0
				lb = 0
				for am, bm, size in d:
				s = "".join(b[lb:bm])
				if am > la or s:
				bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
				la = am + size
				lb = bm + size

				return "".join(bin)

				def blocks(a, b):
				an = splitnewlines(a)
				bn = splitnewlines(b)
				d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
				d = _normalizeblocks(an, bn, d)
				return [(i, i + n, j, j + n) for (i, j, n) in d]

				def fixws(text, allws):
				if allws:
				text = re.sub('[ \t\r]+', '', text)
				else:
				text = re.sub('[ \t\r]+', ' ', text)
				text = text.replace(' \n', '\n')
				return text