##// END OF EJS Templates
revset: use phasecache.getrevset...
revset: use phasecache.getrevset This is part of a refactoring that moves some phase query optimization from revset.py to phases.py. See the previous patch for motivation. This patch changes revset code to use phasecache.getrevset so it no longer accesses the private field: _phasecache._phasesets directly. For performance impact, this patch was tested using the following query, on my hg-committed repo: for i in 'public()' 'not public()' 'draft()' 'not draft()'; do echo $i; hg perfrevset "$i"; hg perfrevset "$i" --hidden; done For the CPython implementation, most operations are unchanged (within +/- 1%), while "not public()" and "draft()" is noticeably faster on an unfiltered repo. It may be because the new code avoids a set copy if filteredrevs is empty. revset | public() | not public() | draft() | not draft() hidden | yes | no | yes | no | yes | no | yes | no ------------------------------------------------------------------ before | 19006 | 17352 | 239 | 286 | 180 | 228 | 7690 | 5745 after | 19137 | 17231 | 240 | 207 | 182 | 150 | 7687 | 5658 delta | | -38% | | -52% | (timed in microseconds) For the pure Python implementation, some operations are faster while "not draft()" is noticeably slower: revset | public() | not public() | draft() | not draft() hidden | yes | no | yes | no | yes | no | yes | no ------------------------------------------------------------------------ before | 18852 | 17183 | 17758 | 15921 | 17505 | 15973 | 41521 | 39822 after | 18924 | 17380 | 17558 | 14545 | 16727 | 13593 | 48356 | 43992 delta | | -9% | -5% | -15% | +16% | +10% That may be the different performance characters of generatorset vs. filteredset. The "not draft()" query could be optimized in this case where both "public" and "secret" are passed to "getrevsets" so it won't iterate the whole repo twice.

File last commit:

r30042:d24e03da default
r31017:17b5cda5 default
Show More
bdiff.py
169 lines | 4.9 KiB | text/x-python | PythonLexer
# bdiff.py - Python implementation of bdiff.c
#
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import
import array
import difflib
import re
import struct
from . import policy
policynocffi = policy.policynocffi
modulepolicy = policy.policy
def splitnewlines(text):
'''like str.splitlines, but only split on newlines.'''
lines = [l + '\n' for l in text.split('\n')]
if lines:
if lines[-1] == '\n':
lines.pop()
else:
lines[-1] = lines[-1][:-1]
return lines
def _normalizeblocks(a, b, blocks):
prev = None
r = []
for curr in blocks:
if prev is None:
prev = curr
continue
shift = 0
a1, b1, l1 = prev
a1end = a1 + l1
b1end = b1 + l1
a2, b2, l2 = curr
a2end = a2 + l2
b2end = b2 + l2
if a1end == a2:
while (a1end + shift < a2end and
a[a1end + shift] == b[b1end + shift]):
shift += 1
elif b1end == b2:
while (b1end + shift < b2end and
a[a1end + shift] == b[b1end + shift]):
shift += 1
r.append((a1, b1, l1 + shift))
prev = a2 + shift, b2 + shift, l2 - shift
r.append(prev)
return r
def _tostring(c):
if type(c) is array.array:
# this copy overhead isn't ideal
return c.tostring()
return str(c)
def bdiff(a, b):
a = _tostring(a).splitlines(True)
b = _tostring(b).splitlines(True)
if not a:
s = "".join(b)
return s and (struct.pack(">lll", 0, 0, len(s)) + s)
bin = []
p = [0]
for i in a: p.append(p[-1] + len(i))
d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
d = _normalizeblocks(a, b, d)
la = 0
lb = 0
for am, bm, size in d:
s = "".join(b[lb:bm])
if am > la or s:
bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
la = am + size
lb = bm + size
return "".join(bin)
def blocks(a, b):
an = splitnewlines(a)
bn = splitnewlines(b)
d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
d = _normalizeblocks(an, bn, d)
return [(i, i + n, j, j + n) for (i, j, n) in d]
def fixws(text, allws):
if allws:
text = re.sub('[ \t\r]+', '', text)
else:
text = re.sub('[ \t\r]+', ' ', text)
text = text.replace(' \n', '\n')
return text
if modulepolicy not in policynocffi:
try:
from _bdiff_cffi import ffi, lib
except ImportError:
if modulepolicy == 'cffi': # strict cffi import
raise
else:
def blocks(sa, sb):
a = ffi.new("struct bdiff_line**")
b = ffi.new("struct bdiff_line**")
ac = ffi.new("char[]", str(sa))
bc = ffi.new("char[]", str(sb))
l = ffi.new("struct bdiff_hunk*")
try:
an = lib.bdiff_splitlines(ac, len(sa), a)
bn = lib.bdiff_splitlines(bc, len(sb), b)
if not a[0] or not b[0]:
raise MemoryError
count = lib.bdiff_diff(a[0], an, b[0], bn, l)
if count < 0:
raise MemoryError
rl = [None] * count
h = l.next
i = 0
while h:
rl[i] = (h.a1, h.a2, h.b1, h.b2)
h = h.next
i += 1
finally:
lib.free(a[0])
lib.free(b[0])
lib.bdiff_freehunks(l.next)
return rl
def bdiff(sa, sb):
a = ffi.new("struct bdiff_line**")
b = ffi.new("struct bdiff_line**")
ac = ffi.new("char[]", str(sa))
bc = ffi.new("char[]", str(sb))
l = ffi.new("struct bdiff_hunk*")
try:
an = lib.bdiff_splitlines(ac, len(sa), a)
bn = lib.bdiff_splitlines(bc, len(sb), b)
if not a[0] or not b[0]:
raise MemoryError
count = lib.bdiff_diff(a[0], an, b[0], bn, l)
if count < 0:
raise MemoryError
rl = []
h = l.next
la = lb = 0
while h:
if h.a1 != la or h.b1 != lb:
lgt = (b[0] + h.b1).l - (b[0] + lb).l
rl.append(struct.pack(">lll", (a[0] + la).l - a[0].l,
(a[0] + h.a1).l - a[0].l, lgt))
rl.append(str(ffi.buffer((b[0] + lb).l, lgt)))
la = h.a2
lb = h.b2
h = h.next
finally:
lib.free(a[0])
lib.free(b[0])
lib.bdiff_freehunks(l.next)
return "".join(rl)