##// END OF EJS Templates
util: teach lrucachedict to enforce a max total cost...
Gregory Szorc -
r39604:842cd0bd default
parent child Browse files
Show More
@@ -1869,18 +1869,23 b' def perfloadmarkers(ui, repo):'
1869 fm.end()
1869 fm.end()
1870
1870
1871 @command(b'perflrucachedict', formatteropts +
1871 @command(b'perflrucachedict', formatteropts +
1872 [(b'', b'size', 4, b'size of cache'),
1872 [(b'', b'costlimit', 0, b'maximum total cost of items in cache'),
1873 (b'', b'mincost', 0, b'smallest cost of items in cache'),
1874 (b'', b'maxcost', 100, b'maximum cost of items in cache'),
1875 (b'', b'size', 4, b'size of cache'),
1873 (b'', b'gets', 10000, b'number of key lookups'),
1876 (b'', b'gets', 10000, b'number of key lookups'),
1874 (b'', b'sets', 10000, b'number of key sets'),
1877 (b'', b'sets', 10000, b'number of key sets'),
1875 (b'', b'mixed', 10000, b'number of mixed mode operations'),
1878 (b'', b'mixed', 10000, b'number of mixed mode operations'),
1876 (b'', b'mixedgetfreq', 50, b'frequency of get vs set ops in mixed mode')],
1879 (b'', b'mixedgetfreq', 50, b'frequency of get vs set ops in mixed mode')],
1877 norepo=True)
1880 norepo=True)
1878 def perflrucache(ui, size=4, gets=10000, sets=10000, mixed=10000,
1881 def perflrucache(ui, mincost=0, maxcost=100, costlimit=0, size=4,
1879 mixedgetfreq=50, **opts):
1882 gets=10000, sets=10000, mixed=10000, mixedgetfreq=50, **opts):
1880 def doinit():
1883 def doinit():
1881 for i in xrange(10000):
1884 for i in xrange(10000):
1882 util.lrucachedict(size)
1885 util.lrucachedict(size)
1883
1886
1887 costrange = list(range(mincost, maxcost + 1))
1888
1884 values = []
1889 values = []
1885 for i in xrange(size):
1890 for i in xrange(size):
1886 values.append(random.randint(0, sys.maxint))
1891 values.append(random.randint(0, sys.maxint))
@@ -1899,16 +1904,34 b' def perflrucache(ui, size=4, gets=10000,'
1899 value = d[key]
1904 value = d[key]
1900 value # silence pyflakes warning
1905 value # silence pyflakes warning
1901
1906
1907 def dogetscost():
1908 d = util.lrucachedict(size, maxcost=costlimit)
1909 for i, v in enumerate(values):
1910 d.insert(v, v, cost=costs[i])
1911 for key in getseq:
1912 try:
1913 value = d[key]
1914 value # silence pyflakes warning
1915 except KeyError:
1916 pass
1917
1902 # Set mode tests insertion speed with cache eviction.
1918 # Set mode tests insertion speed with cache eviction.
1903 setseq = []
1919 setseq = []
1920 costs = []
1904 for i in xrange(sets):
1921 for i in xrange(sets):
1905 setseq.append(random.randint(0, sys.maxint))
1922 setseq.append(random.randint(0, sys.maxint))
1923 costs.append(random.choice(costrange))
1906
1924
1907 def doinserts():
1925 def doinserts():
1908 d = util.lrucachedict(size)
1926 d = util.lrucachedict(size)
1909 for v in setseq:
1927 for v in setseq:
1910 d.insert(v, v)
1928 d.insert(v, v)
1911
1929
1930 def doinsertscost():
1931 d = util.lrucachedict(size, maxcost=costlimit)
1932 for i, v in enumerate(setseq):
1933 d.insert(v, v, cost=costs[i])
1934
1912 def dosets():
1935 def dosets():
1913 d = util.lrucachedict(size)
1936 d = util.lrucachedict(size)
1914 for v in setseq:
1937 for v in setseq:
@@ -1923,12 +1946,14 b' def perflrucache(ui, size=4, gets=10000,'
1923 else:
1946 else:
1924 op = 1
1947 op = 1
1925
1948
1926 mixedops.append((op, random.randint(0, size * 2)))
1949 mixedops.append((op,
1950 random.randint(0, size * 2),
1951 random.choice(costrange)))
1927
1952
1928 def domixed():
1953 def domixed():
1929 d = util.lrucachedict(size)
1954 d = util.lrucachedict(size)
1930
1955
1931 for op, v in mixedops:
1956 for op, v, cost in mixedops:
1932 if op == 0:
1957 if op == 0:
1933 try:
1958 try:
1934 d[v]
1959 d[v]
@@ -1937,14 +1962,36 b' def perflrucache(ui, size=4, gets=10000,'
1937 else:
1962 else:
1938 d[v] = v
1963 d[v] = v
1939
1964
1965 def domixedcost():
1966 d = util.lrucachedict(size, maxcost=costlimit)
1967
1968 for op, v, cost in mixedops:
1969 if op == 0:
1970 try:
1971 d[v]
1972 except KeyError:
1973 pass
1974 else:
1975 d.insert(v, v, cost=cost)
1976
1940 benches = [
1977 benches = [
1941 (doinit, b'init'),
1978 (doinit, b'init'),
1942 (dogets, b'gets'),
1943 (doinserts, b'inserts'),
1944 (dosets, b'sets'),
1945 (domixed, b'mixed')
1946 ]
1979 ]
1947
1980
1981 if costlimit:
1982 benches.extend([
1983 (dogetscost, b'gets w/ cost limit'),
1984 (doinsertscost, b'inserts w/ cost limit'),
1985 (domixedcost, b'mixed w/ cost limit'),
1986 ])
1987 else:
1988 benches.extend([
1989 (dogets, b'gets'),
1990 (doinserts, b'inserts'),
1991 (dosets, b'sets'),
1992 (domixed, b'mixed')
1993 ])
1994
1948 for fn, title in benches:
1995 for fn, title in benches:
1949 timer, fm = gettimer(ui, opts)
1996 timer, fm = gettimer(ui, opts)
1950 timer(fn, title=title)
1997 timer(fn, title=title)
@@ -1240,8 +1240,14 b' class lrucachedict(object):'
1240 Items in the cache can be inserted with an optional "cost" value. This is
1240 Items in the cache can be inserted with an optional "cost" value. This is
1241 simply an integer that is specified by the caller. The cache can be queried
1241 simply an integer that is specified by the caller. The cache can be queried
1242 for the total cost of all items presently in the cache.
1242 for the total cost of all items presently in the cache.
1243
1244 The cache can also define a maximum cost. If a cache insertion would
1245 cause the total cost of the cache to go beyond the maximum cost limit,
1246 nodes will be evicted to make room for the new code. This can be used
1247 to e.g. set a max memory limit and associate an estimated bytes size
1248 cost to each item in the cache. By default, no maximum cost is enforced.
1243 """
1249 """
1244 def __init__(self, max):
1250 def __init__(self, max, maxcost=0):
1245 self._cache = {}
1251 self._cache = {}
1246
1252
1247 self._head = head = _lrucachenode()
1253 self._head = head = _lrucachenode()
@@ -1250,6 +1256,7 b' class lrucachedict(object):'
1250 self._size = 1
1256 self._size = 1
1251 self.capacity = max
1257 self.capacity = max
1252 self.totalcost = 0
1258 self.totalcost = 0
1259 self.maxcost = maxcost
1253
1260
1254 def __len__(self):
1261 def __len__(self):
1255 return len(self._cache)
1262 return len(self._cache)
@@ -1279,6 +1286,10 b' class lrucachedict(object):'
1279 node.cost = cost
1286 node.cost = cost
1280 self.totalcost += cost
1287 self.totalcost += cost
1281 self._movetohead(node)
1288 self._movetohead(node)
1289
1290 if self.maxcost:
1291 self._enforcecostlimit()
1292
1282 return
1293 return
1283
1294
1284 if self._size < self.capacity:
1295 if self._size < self.capacity:
@@ -1301,6 +1312,9 b' class lrucachedict(object):'
1301 # is already self._head.prev.
1312 # is already self._head.prev.
1302 self._head = node
1313 self._head = node
1303
1314
1315 if self.maxcost:
1316 self._enforcecostlimit()
1317
1304 def __setitem__(self, k, v):
1318 def __setitem__(self, k, v):
1305 self.insert(k, v)
1319 self.insert(k, v)
1306
1320
@@ -1331,7 +1345,7 b' class lrucachedict(object):'
1331
1345
1332 self._cache.clear()
1346 self._cache.clear()
1333
1347
1334 def copy(self, capacity=None):
1348 def copy(self, capacity=None, maxcost=0):
1335 """Create a new cache as a copy of the current one.
1349 """Create a new cache as a copy of the current one.
1336
1350
1337 By default, the new cache has the same capacity as the existing one.
1351 By default, the new cache has the same capacity as the existing one.
@@ -1343,7 +1357,8 b' class lrucachedict(object):'
1343 """
1357 """
1344
1358
1345 capacity = capacity or self.capacity
1359 capacity = capacity or self.capacity
1346 result = lrucachedict(capacity)
1360 maxcost = maxcost or self.maxcost
1361 result = lrucachedict(capacity, maxcost=maxcost)
1347
1362
1348 # We copy entries by iterating in oldest-to-newest order so the copy
1363 # We copy entries by iterating in oldest-to-newest order so the copy
1349 # has the correct ordering.
1364 # has the correct ordering.
@@ -1445,6 +1460,13 b' class lrucachedict(object):'
1445 self._size += 1
1460 self._size += 1
1446 return node
1461 return node
1447
1462
1463 def _enforcecostlimit(self):
1464 # This should run after an insertion. It should only be called if total
1465 # cost limits are being enforced.
1466 # The most recently inserted node is never evicted.
1467 while len(self) > 1 and self.totalcost > self.maxcost:
1468 self.popoldest()
1469
1448 def lrucachefunc(func):
1470 def lrucachefunc(func):
1449 '''cache most recent results of function calls'''
1471 '''cache most recent results of function calls'''
1450 cache = {}
1472 cache = {}
@@ -133,6 +133,22 b' class testlrucachedict(unittest.TestCase'
133 for key in ('a', 'b', 'c', 'd'):
133 for key in ('a', 'b', 'c', 'd'):
134 self.assertEqual(d[key], 'v%s' % key)
134 self.assertEqual(d[key], 'v%s' % key)
135
135
136 d = util.lrucachedict(4, maxcost=42)
137 d.insert('a', 'va', cost=5)
138 d.insert('b', 'vb', cost=4)
139 d.insert('c', 'vc', cost=3)
140 dc = d.copy()
141 self.assertEqual(dc.maxcost, 42)
142 self.assertEqual(len(dc), 3)
143
144 # Max cost can be lowered as part of copy.
145 dc = d.copy(maxcost=10)
146 self.assertEqual(dc.maxcost, 10)
147 self.assertEqual(len(dc), 2)
148 self.assertEqual(dc.totalcost, 7)
149 self.assertIn('b', dc)
150 self.assertIn('c', dc)
151
136 def testcopydecreasecapacity(self):
152 def testcopydecreasecapacity(self):
137 d = util.lrucachedict(5)
153 d = util.lrucachedict(5)
138 d.insert('a', 'va', cost=4)
154 d.insert('a', 'va', cost=4)
@@ -217,5 +233,93 b' class testlrucachedict(unittest.TestCase'
217 d['a'] = 'va'
233 d['a'] = 'va'
218 self.assertEqual(d.popoldest(), ('b', 'vb'))
234 self.assertEqual(d.popoldest(), ('b', 'vb'))
219
235
236 def testmaxcost(self):
237 # Item cost is zero by default.
238 d = util.lrucachedict(6, maxcost=10)
239 d['a'] = 'va'
240 d['b'] = 'vb'
241 d['c'] = 'vc'
242 d['d'] = 'vd'
243 self.assertEqual(len(d), 4)
244 self.assertEqual(d.totalcost, 0)
245
246 d.clear()
247
248 # Insertion to exact cost threshold works without eviction.
249 d.insert('a', 'va', cost=6)
250 d.insert('b', 'vb', cost=4)
251
252 self.assertEqual(len(d), 2)
253 self.assertEqual(d['a'], 'va')
254 self.assertEqual(d['b'], 'vb')
255
256 # Inserting a new element with 0 cost works.
257 d['c'] = 'vc'
258 self.assertEqual(len(d), 3)
259
260 # Inserting a new element with cost putting us above high
261 # water mark evicts oldest single item.
262 d.insert('d', 'vd', cost=1)
263 self.assertEqual(len(d), 3)
264 self.assertEqual(d.totalcost, 5)
265 self.assertNotIn('a', d)
266 for key in ('b', 'c', 'd'):
267 self.assertEqual(d[key], 'v%s' % key)
268
269 # Inserting a new element with enough room for just itself
270 # evicts all items before.
271 d.insert('e', 've', cost=10)
272 self.assertEqual(len(d), 1)
273 self.assertEqual(d.totalcost, 10)
274 self.assertIn('e', d)
275
276 # Inserting a new element with cost greater than threshold
277 # still retains that item.
278 d.insert('f', 'vf', cost=11)
279 self.assertEqual(len(d), 1)
280 self.assertEqual(d.totalcost, 11)
281 self.assertIn('f', d)
282
283 # Inserting a new element will evict the last item since it is
284 # too large.
285 d['g'] = 'vg'
286 self.assertEqual(len(d), 1)
287 self.assertEqual(d.totalcost, 0)
288 self.assertIn('g', d)
289
290 d.clear()
291
292 d.insert('a', 'va', cost=7)
293 d.insert('b', 'vb', cost=3)
294 self.assertEqual(len(d), 2)
295
296 # Replacing a value with smaller cost won't result in eviction.
297 d.insert('b', 'vb2', cost=2)
298 self.assertEqual(len(d), 2)
299
300 # Replacing a value with a higher cost will evict when threshold
301 # exceeded.
302 d.insert('b', 'vb3', cost=4)
303 self.assertEqual(len(d), 1)
304 self.assertNotIn('a', d)
305
306 def testmaxcostcomplex(self):
307 d = util.lrucachedict(100, maxcost=100)
308 d.insert('a', 'va', cost=9)
309 d.insert('b', 'vb', cost=21)
310 d.insert('c', 'vc', cost=7)
311 d.insert('d', 'vc', cost=50)
312 self.assertEqual(d.totalcost, 87)
313
314 # Inserting new element should free multiple elements so we hit
315 # low water mark.
316 d.insert('e', 'vd', cost=25)
317 self.assertEqual(len(d), 3)
318 self.assertNotIn('a', d)
319 self.assertNotIn('b', d)
320 self.assertIn('c', d)
321 self.assertIn('d', d)
322 self.assertIn('e', d)
323
220 if __name__ == '__main__':
324 if __name__ == '__main__':
221 silenttestrunner.main(__name__)
325 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now