##// END OF EJS Templates
tests: use bytes and %d formatting in test-remotefilelog-datapack.py...
Gregory Szorc -
r41612:26832569 default
parent child Browse files
Show More
@@ -1,378 +1,379
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 from __future__ import absolute_import, print_function
2 from __future__ import absolute_import, print_function
3
3
4 import hashlib
4 import hashlib
5 import os
5 import os
6 import random
6 import random
7 import shutil
7 import shutil
8 import stat
8 import stat
9 import struct
9 import struct
10 import sys
10 import sys
11 import tempfile
11 import tempfile
12 import time
12 import time
13 import unittest
13 import unittest
14
14
15 import silenttestrunner
15 import silenttestrunner
16
16
17 # Load the local remotefilelog, not the system one
17 # Load the local remotefilelog, not the system one
18 sys.path[0:0] = [os.path.join(os.path.dirname(__file__), '..')]
18 sys.path[0:0] = [os.path.join(os.path.dirname(__file__), '..')]
19 from mercurial.node import nullid
19 from mercurial.node import nullid
20 from mercurial import (
20 from mercurial import (
21 pycompat,
21 pycompat,
22 ui as uimod,
22 ui as uimod,
23 )
23 )
24 from hgext.remotefilelog import (
24 from hgext.remotefilelog import (
25 basepack,
25 basepack,
26 constants,
26 constants,
27 datapack,
27 datapack,
28 )
28 )
29
29
30 class datapacktestsbase(object):
30 class datapacktestsbase(object):
31 def __init__(self, datapackreader, paramsavailable):
31 def __init__(self, datapackreader, paramsavailable):
32 self.datapackreader = datapackreader
32 self.datapackreader = datapackreader
33 self.paramsavailable = paramsavailable
33 self.paramsavailable = paramsavailable
34
34
35 def setUp(self):
35 def setUp(self):
36 self.tempdirs = []
36 self.tempdirs = []
37
37
38 def tearDown(self):
38 def tearDown(self):
39 for d in self.tempdirs:
39 for d in self.tempdirs:
40 shutil.rmtree(d)
40 shutil.rmtree(d)
41
41
42 def makeTempDir(self):
42 def makeTempDir(self):
43 tempdir = tempfile.mkdtemp()
43 tempdir = tempfile.mkdtemp()
44 self.tempdirs.append(tempdir)
44 self.tempdirs.append(tempdir)
45 return tempdir
45 return tempdir
46
46
47 def getHash(self, content):
47 def getHash(self, content):
48 return hashlib.sha1(content).digest()
48 return hashlib.sha1(content).digest()
49
49
50 def getFakeHash(self):
50 def getFakeHash(self):
51 return ''.join(chr(random.randint(0, 255)) for _ in range(20))
51 return ''.join(chr(random.randint(0, 255)) for _ in range(20))
52
52
53 def createPack(self, revisions=None, packdir=None):
53 def createPack(self, revisions=None, packdir=None):
54 if revisions is None:
54 if revisions is None:
55 revisions = [("filename", self.getFakeHash(), nullid, "content")]
55 revisions = [(b"filename", self.getFakeHash(), nullid, b"content")]
56
56
57 if packdir is None:
57 if packdir is None:
58 packdir = self.makeTempDir()
58 packdir = self.makeTempDir()
59
59
60 packer = datapack.mutabledatapack(uimod.ui(), packdir, version=2)
60 packer = datapack.mutabledatapack(uimod.ui(), packdir, version=2)
61
61
62 for args in revisions:
62 for args in revisions:
63 filename, node, base, content = args[0:4]
63 filename, node, base, content = args[0:4]
64 # meta is optional
64 # meta is optional
65 meta = None
65 meta = None
66 if len(args) > 4:
66 if len(args) > 4:
67 meta = args[4]
67 meta = args[4]
68 packer.add(filename, node, base, content, metadata=meta)
68 packer.add(filename, node, base, content, metadata=meta)
69
69
70 path = packer.close()
70 path = packer.close()
71 return self.datapackreader(path)
71 return self.datapackreader(path)
72
72
73 def _testAddSingle(self, content):
73 def _testAddSingle(self, content):
74 """Test putting a simple blob into a pack and reading it out.
74 """Test putting a simple blob into a pack and reading it out.
75 """
75 """
76 filename = "foo"
76 filename = b"foo"
77 node = self.getHash(content)
77 node = self.getHash(content)
78
78
79 revisions = [(filename, node, nullid, content)]
79 revisions = [(filename, node, nullid, content)]
80 pack = self.createPack(revisions)
80 pack = self.createPack(revisions)
81 if self.paramsavailable:
81 if self.paramsavailable:
82 self.assertEqual(pack.params.fanoutprefix,
82 self.assertEqual(pack.params.fanoutprefix,
83 basepack.SMALLFANOUTPREFIX)
83 basepack.SMALLFANOUTPREFIX)
84
84
85 chain = pack.getdeltachain(filename, node)
85 chain = pack.getdeltachain(filename, node)
86 self.assertEqual(content, chain[0][4])
86 self.assertEqual(content, chain[0][4])
87
87
88 def testAddSingle(self):
88 def testAddSingle(self):
89 self._testAddSingle('')
89 self._testAddSingle(b'')
90
90
91 def testAddSingleEmpty(self):
91 def testAddSingleEmpty(self):
92 self._testAddSingle('abcdef')
92 self._testAddSingle(b'abcdef')
93
93
94 def testAddMultiple(self):
94 def testAddMultiple(self):
95 """Test putting multiple unrelated blobs into a pack and reading them
95 """Test putting multiple unrelated blobs into a pack and reading them
96 out.
96 out.
97 """
97 """
98 revisions = []
98 revisions = []
99 for i in range(10):
99 for i in range(10):
100 filename = "foo%s" % i
100 filename = b"foo%d" % i
101 content = "abcdef%s" % i
101 content = b"abcdef%d" % i
102 node = self.getHash(content)
102 node = self.getHash(content)
103 revisions.append((filename, node, self.getFakeHash(), content))
103 revisions.append((filename, node, self.getFakeHash(), content))
104
104
105 pack = self.createPack(revisions)
105 pack = self.createPack(revisions)
106
106
107 for filename, node, base, content in revisions:
107 for filename, node, base, content in revisions:
108 entry = pack.getdelta(filename, node)
108 entry = pack.getdelta(filename, node)
109 self.assertEqual((content, filename, base, {}), entry)
109 self.assertEqual((content, filename, base, {}), entry)
110
110
111 chain = pack.getdeltachain(filename, node)
111 chain = pack.getdeltachain(filename, node)
112 self.assertEqual(content, chain[0][4])
112 self.assertEqual(content, chain[0][4])
113
113
114 def testAddDeltas(self):
114 def testAddDeltas(self):
115 """Test putting multiple delta blobs into a pack and read the chain.
115 """Test putting multiple delta blobs into a pack and read the chain.
116 """
116 """
117 revisions = []
117 revisions = []
118 filename = "foo"
118 filename = b"foo"
119 lastnode = nullid
119 lastnode = nullid
120 for i in range(10):
120 for i in range(10):
121 content = "abcdef%s" % i
121 content = b"abcdef%d" % i
122 node = self.getHash(content)
122 node = self.getHash(content)
123 revisions.append((filename, node, lastnode, content))
123 revisions.append((filename, node, lastnode, content))
124 lastnode = node
124 lastnode = node
125
125
126 pack = self.createPack(revisions)
126 pack = self.createPack(revisions)
127
127
128 entry = pack.getdelta(filename, revisions[0][1])
128 entry = pack.getdelta(filename, revisions[0][1])
129 realvalue = (revisions[0][3], filename, revisions[0][2], {})
129 realvalue = (revisions[0][3], filename, revisions[0][2], {})
130 self.assertEqual(entry, realvalue)
130 self.assertEqual(entry, realvalue)
131
131
132 # Test that the chain for the final entry has all the others
132 # Test that the chain for the final entry has all the others
133 chain = pack.getdeltachain(filename, node)
133 chain = pack.getdeltachain(filename, node)
134 for i in range(10):
134 for i in range(10):
135 content = "abcdef%s" % i
135 content = b"abcdef%d" % i
136 self.assertEqual(content, chain[-i - 1][4])
136 self.assertEqual(content, chain[-i - 1][4])
137
137
138 def testPackMany(self):
138 def testPackMany(self):
139 """Pack many related and unrelated objects.
139 """Pack many related and unrelated objects.
140 """
140 """
141 # Build a random pack file
141 # Build a random pack file
142 revisions = []
142 revisions = []
143 blobs = {}
143 blobs = {}
144 random.seed(0)
144 random.seed(0)
145 for i in range(100):
145 for i in range(100):
146 filename = "filename-%s" % i
146 filename = b"filename-%d" % i
147 filerevs = []
147 filerevs = []
148 for j in range(random.randint(1, 100)):
148 for j in range(random.randint(1, 100)):
149 content = "content-%s" % j
149 content = b"content-%d" % j
150 node = self.getHash(content)
150 node = self.getHash(content)
151 lastnode = nullid
151 lastnode = nullid
152 if len(filerevs) > 0:
152 if len(filerevs) > 0:
153 lastnode = filerevs[random.randint(0, len(filerevs) - 1)]
153 lastnode = filerevs[random.randint(0, len(filerevs) - 1)]
154 filerevs.append(node)
154 filerevs.append(node)
155 blobs[(filename, node, lastnode)] = content
155 blobs[(filename, node, lastnode)] = content
156 revisions.append((filename, node, lastnode, content))
156 revisions.append((filename, node, lastnode, content))
157
157
158 pack = self.createPack(revisions)
158 pack = self.createPack(revisions)
159
159
160 # Verify the pack contents
160 # Verify the pack contents
161 for (filename, node, lastnode), content in sorted(blobs.iteritems()):
161 for (filename, node, lastnode), content in sorted(blobs.iteritems()):
162 chain = pack.getdeltachain(filename, node)
162 chain = pack.getdeltachain(filename, node)
163 for entry in chain:
163 for entry in chain:
164 expectedcontent = blobs[(entry[0], entry[1], entry[3])]
164 expectedcontent = blobs[(entry[0], entry[1], entry[3])]
165 self.assertEqual(entry[4], expectedcontent)
165 self.assertEqual(entry[4], expectedcontent)
166
166
167 def testPackMetadata(self):
167 def testPackMetadata(self):
168 revisions = []
168 revisions = []
169 for i in range(100):
169 for i in range(100):
170 filename = '%s.txt' % i
170 filename = b'%d.txt' % i
171 content = 'put-something-here \n' * i
171 content = b'put-something-here \n' * i
172 node = self.getHash(content)
172 node = self.getHash(content)
173 meta = {constants.METAKEYFLAG: i ** 4,
173 meta = {constants.METAKEYFLAG: i ** 4,
174 constants.METAKEYSIZE: len(content),
174 constants.METAKEYSIZE: len(content),
175 'Z': 'random_string',
175 b'Z': b'random_string',
176 '_': '\0' * i}
176 b'_': b'\0' * i}
177 revisions.append((filename, node, nullid, content, meta))
177 revisions.append((filename, node, nullid, content, meta))
178 pack = self.createPack(revisions)
178 pack = self.createPack(revisions)
179 for name, node, x, content, origmeta in revisions:
179 for name, node, x, content, origmeta in revisions:
180 parsedmeta = pack.getmeta(name, node)
180 parsedmeta = pack.getmeta(name, node)
181 # flag == 0 should be optimized out
181 # flag == 0 should be optimized out
182 if origmeta[constants.METAKEYFLAG] == 0:
182 if origmeta[constants.METAKEYFLAG] == 0:
183 del origmeta[constants.METAKEYFLAG]
183 del origmeta[constants.METAKEYFLAG]
184 self.assertEqual(parsedmeta, origmeta)
184 self.assertEqual(parsedmeta, origmeta)
185
185
186 def testGetMissing(self):
186 def testGetMissing(self):
187 """Test the getmissing() api.
187 """Test the getmissing() api.
188 """
188 """
189 revisions = []
189 revisions = []
190 filename = "foo"
190 filename = b"foo"
191 lastnode = nullid
191 lastnode = nullid
192 for i in range(10):
192 for i in range(10):
193 content = "abcdef%s" % i
193 content = b"abcdef%d" % i
194 node = self.getHash(content)
194 node = self.getHash(content)
195 revisions.append((filename, node, lastnode, content))
195 revisions.append((filename, node, lastnode, content))
196 lastnode = node
196 lastnode = node
197
197
198 pack = self.createPack(revisions)
198 pack = self.createPack(revisions)
199
199
200 missing = pack.getmissing([("foo", revisions[0][1])])
200 missing = pack.getmissing([(b"foo", revisions[0][1])])
201 self.assertFalse(missing)
201 self.assertFalse(missing)
202
202
203 missing = pack.getmissing([("foo", revisions[0][1]),
203 missing = pack.getmissing([(b"foo", revisions[0][1]),
204 ("foo", revisions[1][1])])
204 (b"foo", revisions[1][1])])
205 self.assertFalse(missing)
205 self.assertFalse(missing)
206
206
207 fakenode = self.getFakeHash()
207 fakenode = self.getFakeHash()
208 missing = pack.getmissing([("foo", revisions[0][1]), ("foo", fakenode)])
208 missing = pack.getmissing([(b"foo", revisions[0][1]),
209 self.assertEqual(missing, [("foo", fakenode)])
209 (b"foo", fakenode)])
210 self.assertEqual(missing, [(b"foo", fakenode)])
210
211
211 def testAddThrows(self):
212 def testAddThrows(self):
212 pack = self.createPack()
213 pack = self.createPack()
213
214
214 try:
215 try:
215 pack.add('filename', nullid, 'contents')
216 pack.add(b'filename', nullid, b'contents')
216 self.assertTrue(False, "datapack.add should throw")
217 self.assertTrue(False, "datapack.add should throw")
217 except RuntimeError:
218 except RuntimeError:
218 pass
219 pass
219
220
220 def testBadVersionThrows(self):
221 def testBadVersionThrows(self):
221 pack = self.createPack()
222 pack = self.createPack()
222 path = pack.path + '.datapack'
223 path = pack.path + b'.datapack'
223 with open(path) as f:
224 with open(path) as f:
224 raw = f.read()
225 raw = f.read()
225 raw = struct.pack('!B', 255) + raw[1:]
226 raw = struct.pack('!B', 255) + raw[1:]
226 os.chmod(path, os.stat(path).st_mode | stat.S_IWRITE)
227 os.chmod(path, os.stat(path).st_mode | stat.S_IWRITE)
227 with open(path, 'w+') as f:
228 with open(path, 'w+') as f:
228 f.write(raw)
229 f.write(raw)
229
230
230 try:
231 try:
231 pack = self.datapackreader(pack.path)
232 pack = self.datapackreader(pack.path)
232 self.assertTrue(False, "bad version number should have thrown")
233 self.assertTrue(False, "bad version number should have thrown")
233 except RuntimeError:
234 except RuntimeError:
234 pass
235 pass
235
236
236 def testMissingDeltabase(self):
237 def testMissingDeltabase(self):
237 fakenode = self.getFakeHash()
238 fakenode = self.getFakeHash()
238 revisions = [("filename", fakenode, self.getFakeHash(), "content")]
239 revisions = [(b"filename", fakenode, self.getFakeHash(), b"content")]
239 pack = self.createPack(revisions)
240 pack = self.createPack(revisions)
240 chain = pack.getdeltachain("filename", fakenode)
241 chain = pack.getdeltachain(b"filename", fakenode)
241 self.assertEqual(len(chain), 1)
242 self.assertEqual(len(chain), 1)
242
243
243 def testLargePack(self):
244 def testLargePack(self):
244 """Test creating and reading from a large pack with over X entries.
245 """Test creating and reading from a large pack with over X entries.
245 This causes it to use a 2^16 fanout table instead."""
246 This causes it to use a 2^16 fanout table instead."""
246 revisions = []
247 revisions = []
247 blobs = {}
248 blobs = {}
248 total = basepack.SMALLFANOUTCUTOFF + 1
249 total = basepack.SMALLFANOUTCUTOFF + 1
249 for i in pycompat.xrange(total):
250 for i in pycompat.xrange(total):
250 filename = "filename-%s" % i
251 filename = b"filename-%d" % i
251 content = filename
252 content = filename
252 node = self.getHash(content)
253 node = self.getHash(content)
253 blobs[(filename, node)] = content
254 blobs[(filename, node)] = content
254 revisions.append((filename, node, nullid, content))
255 revisions.append((filename, node, nullid, content))
255
256
256 pack = self.createPack(revisions)
257 pack = self.createPack(revisions)
257 if self.paramsavailable:
258 if self.paramsavailable:
258 self.assertEqual(pack.params.fanoutprefix,
259 self.assertEqual(pack.params.fanoutprefix,
259 basepack.LARGEFANOUTPREFIX)
260 basepack.LARGEFANOUTPREFIX)
260
261
261 for (filename, node), content in blobs.iteritems():
262 for (filename, node), content in blobs.iteritems():
262 actualcontent = pack.getdeltachain(filename, node)[0][4]
263 actualcontent = pack.getdeltachain(filename, node)[0][4]
263 self.assertEqual(actualcontent, content)
264 self.assertEqual(actualcontent, content)
264
265
265 def testPacksCache(self):
266 def testPacksCache(self):
266 """Test that we remember the most recent packs while fetching the delta
267 """Test that we remember the most recent packs while fetching the delta
267 chain."""
268 chain."""
268
269
269 packdir = self.makeTempDir()
270 packdir = self.makeTempDir()
270 deltachains = []
271 deltachains = []
271
272
272 numpacks = 10
273 numpacks = 10
273 revisionsperpack = 100
274 revisionsperpack = 100
274
275
275 for i in range(numpacks):
276 for i in range(numpacks):
276 chain = []
277 chain = []
277 revision = (str(i), self.getFakeHash(), nullid, "content")
278 revision = (b'%d' % i, self.getFakeHash(), nullid, b"content")
278
279
279 for _ in range(revisionsperpack):
280 for _ in range(revisionsperpack):
280 chain.append(revision)
281 chain.append(revision)
281 revision = (
282 revision = (
282 str(i),
283 b'%d' % i,
283 self.getFakeHash(),
284 self.getFakeHash(),
284 revision[1],
285 revision[1],
285 self.getFakeHash()
286 self.getFakeHash()
286 )
287 )
287
288
288 self.createPack(chain, packdir)
289 self.createPack(chain, packdir)
289 deltachains.append(chain)
290 deltachains.append(chain)
290
291
291 class testdatapackstore(datapack.datapackstore):
292 class testdatapackstore(datapack.datapackstore):
292 # Ensures that we are not keeping everything in the cache.
293 # Ensures that we are not keeping everything in the cache.
293 DEFAULTCACHESIZE = numpacks / 2
294 DEFAULTCACHESIZE = numpacks / 2
294
295
295 store = testdatapackstore(uimod.ui(), packdir)
296 store = testdatapackstore(uimod.ui(), packdir)
296
297
297 random.shuffle(deltachains)
298 random.shuffle(deltachains)
298 for randomchain in deltachains:
299 for randomchain in deltachains:
299 revision = random.choice(randomchain)
300 revision = random.choice(randomchain)
300 chain = store.getdeltachain(revision[0], revision[1])
301 chain = store.getdeltachain(revision[0], revision[1])
301
302
302 mostrecentpack = next(iter(store.packs), None)
303 mostrecentpack = next(iter(store.packs), None)
303 self.assertEqual(
304 self.assertEqual(
304 mostrecentpack.getdeltachain(revision[0], revision[1]),
305 mostrecentpack.getdeltachain(revision[0], revision[1]),
305 chain
306 chain
306 )
307 )
307
308
308 self.assertEqual(randomchain.index(revision) + 1, len(chain))
309 self.assertEqual(randomchain.index(revision) + 1, len(chain))
309
310
310 # perf test off by default since it's slow
311 # perf test off by default since it's slow
311 def _testIndexPerf(self):
312 def _testIndexPerf(self):
312 random.seed(0)
313 random.seed(0)
313 print("Multi-get perf test")
314 print("Multi-get perf test")
314 packsizes = [
315 packsizes = [
315 100,
316 100,
316 10000,
317 10000,
317 100000,
318 100000,
318 500000,
319 500000,
319 1000000,
320 1000000,
320 3000000,
321 3000000,
321 ]
322 ]
322 lookupsizes = [
323 lookupsizes = [
323 10,
324 10,
324 100,
325 100,
325 1000,
326 1000,
326 10000,
327 10000,
327 100000,
328 100000,
328 1000000,
329 1000000,
329 ]
330 ]
330 for packsize in packsizes:
331 for packsize in packsizes:
331 revisions = []
332 revisions = []
332 for i in pycompat.xrange(packsize):
333 for i in pycompat.xrange(packsize):
333 filename = "filename-%s" % i
334 filename = b"filename-%d" % i
334 content = "content-%s" % i
335 content = b"content-%d" % i
335 node = self.getHash(content)
336 node = self.getHash(content)
336 revisions.append((filename, node, nullid, content))
337 revisions.append((filename, node, nullid, content))
337
338
338 path = self.createPack(revisions).path
339 path = self.createPack(revisions).path
339
340
340 # Perf of large multi-get
341 # Perf of large multi-get
341 import gc
342 import gc
342 gc.disable()
343 gc.disable()
343 pack = self.datapackreader(path)
344 pack = self.datapackreader(path)
344 for lookupsize in lookupsizes:
345 for lookupsize in lookupsizes:
345 if lookupsize > packsize:
346 if lookupsize > packsize:
346 continue
347 continue
347 random.shuffle(revisions)
348 random.shuffle(revisions)
348 findnodes = [(rev[0], rev[1]) for rev in revisions]
349 findnodes = [(rev[0], rev[1]) for rev in revisions]
349
350
350 start = time.time()
351 start = time.time()
351 pack.getmissing(findnodes[:lookupsize])
352 pack.getmissing(findnodes[:lookupsize])
352 elapsed = time.time() - start
353 elapsed = time.time() - start
353 print ("%s pack %s lookups = %0.04f" %
354 print ("%s pack %d lookups = %0.04f" %
354 (('%s' % packsize).rjust(7),
355 (('%d' % packsize).rjust(7),
355 ('%s' % lookupsize).rjust(7),
356 ('%d' % lookupsize).rjust(7),
356 elapsed))
357 elapsed))
357
358
358 print("")
359 print("")
359 gc.enable()
360 gc.enable()
360
361
361 # The perf test is meant to produce output, so we always fail the test
362 # The perf test is meant to produce output, so we always fail the test
362 # so the user sees the output.
363 # so the user sees the output.
363 raise RuntimeError("perf test always fails")
364 raise RuntimeError("perf test always fails")
364
365
365 class datapacktests(datapacktestsbase, unittest.TestCase):
366 class datapacktests(datapacktestsbase, unittest.TestCase):
366 def __init__(self, *args, **kwargs):
367 def __init__(self, *args, **kwargs):
367 datapacktestsbase.__init__(self, datapack.datapack, True)
368 datapacktestsbase.__init__(self, datapack.datapack, True)
368 unittest.TestCase.__init__(self, *args, **kwargs)
369 unittest.TestCase.__init__(self, *args, **kwargs)
369
370
370 # TODO:
371 # TODO:
371 # datapack store:
372 # datapack store:
372 # - getmissing
373 # - getmissing
373 # - GC two packs into one
374 # - GC two packs into one
374
375
375 if __name__ == '__main__':
376 if __name__ == '__main__':
376 if pycompat.iswindows:
377 if pycompat.iswindows:
377 sys.exit(80) # Skip on Windows
378 sys.exit(80) # Skip on Windows
378 silenttestrunner.main(__name__)
379 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now