##// END OF EJS Templates
rm extra print
vivainio -
Show More
@@ -1,353 +1,354 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2
2
3 """ PickleShare - a small 'shelve' like datastore with concurrency support
3 """ PickleShare - a small 'shelve' like datastore with concurrency support
4
4
5 Like shelve, a PickleShareDB object acts like a normal dictionary. Unlike
5 Like shelve, a PickleShareDB object acts like a normal dictionary. Unlike
6 shelve, many processes can access the database simultaneously. Changing a
6 shelve, many processes can access the database simultaneously. Changing a
7 value in database is immediately visible to other processes accessing the
7 value in database is immediately visible to other processes accessing the
8 same database.
8 same database.
9
9
10 Concurrency is possible because the values are stored in separate files. Hence
10 Concurrency is possible because the values are stored in separate files. Hence
11 the "database" is a directory where *all* files are governed by PickleShare.
11 the "database" is a directory where *all* files are governed by PickleShare.
12
12
13 Example usage::
13 Example usage::
14
14
15 from pickleshare import *
15 from pickleshare import *
16 db = PickleShareDB('~/testpickleshare')
16 db = PickleShareDB('~/testpickleshare')
17 db.clear()
17 db.clear()
18 print "Should be empty:",db.items()
18 print "Should be empty:",db.items()
19 db['hello'] = 15
19 db['hello'] = 15
20 db['aku ankka'] = [1,2,313]
20 db['aku ankka'] = [1,2,313]
21 db['paths/are/ok/key'] = [1,(5,46)]
21 db['paths/are/ok/key'] = [1,(5,46)]
22 print db.keys()
22 print db.keys()
23 del db['aku ankka']
23 del db['aku ankka']
24
24
25 This module is certainly not ZODB, but can be used for low-load
25 This module is certainly not ZODB, but can be used for low-load
26 (non-mission-critical) situations where tiny code size trumps the
26 (non-mission-critical) situations where tiny code size trumps the
27 advanced features of a "real" object database.
27 advanced features of a "real" object database.
28
28
29 Installation guide: easy_install pickleshare
29 Installation guide: easy_install pickleshare
30
30
31 Author: Ville Vainio <vivainio@gmail.com>
31 Author: Ville Vainio <vivainio@gmail.com>
32 License: MIT open source license.
32 License: MIT open source license.
33
33
34 """
34 """
35
35
36 from path import path as Path
36 from path import path as Path
37 import os,stat,time
37 import os,stat,time
38 import cPickle as pickle
38 import cPickle as pickle
39 import UserDict
39 import UserDict
40 import warnings
40 import warnings
41 import glob
41 import glob
42
42
43 from sets import Set as set
43 from sets import Set as set
44
44
45 def gethashfile(key):
45 def gethashfile(key):
46 return ("%02x" % abs(hash(key) % 256))[-2:]
46 return ("%02x" % abs(hash(key) % 256))[-2:]
47
47
48 _sentinel = object()
48 _sentinel = object()
49
49
50 class PickleShareDB(UserDict.DictMixin):
50 class PickleShareDB(UserDict.DictMixin):
51 """ The main 'connection' object for PickleShare database """
51 """ The main 'connection' object for PickleShare database """
52 def __init__(self,root):
52 def __init__(self,root):
53 """ Return a db object that will manage the specied directory"""
53 """ Return a db object that will manage the specied directory"""
54 self.root = Path(root).expanduser().abspath()
54 self.root = Path(root).expanduser().abspath()
55 if not self.root.isdir():
55 if not self.root.isdir():
56 self.root.makedirs()
56 self.root.makedirs()
57 # cache has { 'key' : (obj, orig_mod_time) }
57 # cache has { 'key' : (obj, orig_mod_time) }
58 self.cache = {}
58 self.cache = {}
59
59
60
60
61 def __getitem__(self,key):
61 def __getitem__(self,key):
62 """ db['key'] reading """
62 """ db['key'] reading """
63 fil = self.root / key
63 fil = self.root / key
64 try:
64 try:
65 mtime = (fil.stat()[stat.ST_MTIME])
65 mtime = (fil.stat()[stat.ST_MTIME])
66 except OSError:
66 except OSError:
67 raise KeyError(key)
67 raise KeyError(key)
68
68
69 if fil in self.cache and mtime == self.cache[fil][1]:
69 if fil in self.cache and mtime == self.cache[fil][1]:
70 return self.cache[fil][0]
70 return self.cache[fil][0]
71 try:
71 try:
72 # The cached item has expired, need to read
72 # The cached item has expired, need to read
73 obj = pickle.load(fil.open())
73 obj = pickle.load(fil.open())
74 except:
74 except:
75 raise KeyError(key)
75 raise KeyError(key)
76
76
77 self.cache[fil] = (obj,mtime)
77 self.cache[fil] = (obj,mtime)
78 return obj
78 return obj
79
79
80 def __setitem__(self,key,value):
80 def __setitem__(self,key,value):
81 """ db['key'] = 5 """
81 """ db['key'] = 5 """
82 fil = self.root / key
82 fil = self.root / key
83 parent = fil.parent
83 parent = fil.parent
84 if parent and not parent.isdir():
84 if parent and not parent.isdir():
85 parent.makedirs()
85 parent.makedirs()
86 pickled = pickle.dump(value,fil.open('w'))
86 pickled = pickle.dump(value,fil.open('w'))
87 try:
87 try:
88 self.cache[fil] = (value,fil.mtime)
88 self.cache[fil] = (value,fil.mtime)
89 except OSError,e:
89 except OSError,e:
90 if e.errno != 2:
90 if e.errno != 2:
91 raise
91 raise
92
92
93 def hset(self, hashroot, key, value):
93 def hset(self, hashroot, key, value):
94 """ hashed set """
94 """ hashed set """
95 hroot = self.root / hashroot
95 hroot = self.root / hashroot
96 if not hroot.isdir():
96 if not hroot.isdir():
97 hroot.makedirs()
97 hroot.makedirs()
98 hfile = hroot / gethashfile(key)
98 hfile = hroot / gethashfile(key)
99 d = self.get(hfile, {})
99 d = self.get(hfile, {})
100 d.update( {key : value})
100 d.update( {key : value})
101 self[hfile] = d
101 self[hfile] = d
102
102
103
103
104
104
105 def hget(self, hashroot, key, default = _sentinel, fast_only = True):
105 def hget(self, hashroot, key, default = _sentinel, fast_only = True):
106 """ hashed get """
106 """ hashed get """
107 hroot = self.root / hashroot
107 hroot = self.root / hashroot
108 hfile = hroot / gethashfile(key)
108 hfile = hroot / gethashfile(key)
109
109
110 d = self.get(hfile, _sentinel )
110 d = self.get(hfile, _sentinel )
111 #print "got dict",d,"from",hfile
111 #print "got dict",d,"from",hfile
112 if d is _sentinel:
112 if d is _sentinel:
113 if fast_only:
113 if fast_only:
114 if default is _sentinel:
114 if default is _sentinel:
115 raise KeyError(key)
115 raise KeyError(key)
116
116
117 return default
117 return default
118
118
119 # slow mode ok, works even after hcompress()
119 # slow mode ok, works even after hcompress()
120 d = self.hdict(hashroot)
120 d = self.hdict(hashroot)
121
121
122 return d.get(key, default)
122 return d.get(key, default)
123
123
124 def hdict(self, hashroot):
124 def hdict(self, hashroot):
125 """ Get all data contained in hashed category 'hashroot' as dict """
125 """ Get all data contained in hashed category 'hashroot' as dict """
126 hfiles = self.keys(hashroot + "/*")
126 hfiles = self.keys(hashroot + "/*")
127 hfiles.sort()
127 last = len(hfiles) and hfiles[-1] or ''
128 last = len(hfiles) and hfiles[-1] or ''
128 if last.endswith('xx'):
129 if last.endswith('xx'):
129 print "using xx"
130 # print "using xx"
130 hfiles = [last] + hfiles[:-1]
131 hfiles = [last] + hfiles[:-1]
131
132
132 all = {}
133 all = {}
133
134
134 for f in hfiles:
135 for f in hfiles:
135 # print "using",f
136 # print "using",f
136 all.update(self[f])
137 all.update(self[f])
137 self.uncache(f)
138 self.uncache(f)
138
139
139 return all
140 return all
140
141
141 def hcompress(self, hashroot):
142 def hcompress(self, hashroot):
142 """ Compress category 'hashroot', so hset is fast again
143 """ Compress category 'hashroot', so hset is fast again
143
144
144 hget will fail if fast_only is True for compressed items (that were
145 hget will fail if fast_only is True for compressed items (that were
145 hset before hcompress).
146 hset before hcompress).
146
147
147 """
148 """
148 hfiles = self.keys(hashroot + "/*")
149 hfiles = self.keys(hashroot + "/*")
149 all = {}
150 all = {}
150 for f in hfiles:
151 for f in hfiles:
151 # print "using",f
152 # print "using",f
152 all.update(self[f])
153 all.update(self[f])
153 self.uncache(f)
154 self.uncache(f)
154
155
155 self[hashroot + '/xx'] = all
156 self[hashroot + '/xx'] = all
156 for f in hfiles:
157 for f in hfiles:
157 p = self.root / f
158 p = self.root / f
158 if p.basename() == 'xx':
159 if p.basename() == 'xx':
159 continue
160 continue
160 p.remove()
161 p.remove()
161
162
162
163
163
164
164 def __delitem__(self,key):
165 def __delitem__(self,key):
165 """ del db["key"] """
166 """ del db["key"] """
166 fil = self.root / key
167 fil = self.root / key
167 self.cache.pop(fil,None)
168 self.cache.pop(fil,None)
168 try:
169 try:
169 fil.remove()
170 fil.remove()
170 except OSError:
171 except OSError:
171 # notfound and permission denied are ok - we
172 # notfound and permission denied are ok - we
172 # lost, the other process wins the conflict
173 # lost, the other process wins the conflict
173 pass
174 pass
174
175
175 def _normalized(self, p):
176 def _normalized(self, p):
176 """ Make a key suitable for user's eyes """
177 """ Make a key suitable for user's eyes """
177 return str(self.root.relpathto(p)).replace('\\','/')
178 return str(self.root.relpathto(p)).replace('\\','/')
178
179
179 def keys(self, globpat = None):
180 def keys(self, globpat = None):
180 """ All keys in DB, or all keys matching a glob"""
181 """ All keys in DB, or all keys matching a glob"""
181
182
182 if globpat is None:
183 if globpat is None:
183 files = self.root.walkfiles()
184 files = self.root.walkfiles()
184 else:
185 else:
185 files = [Path(p) for p in glob.glob(self.root/globpat)]
186 files = [Path(p) for p in glob.glob(self.root/globpat)]
186 return [self._normalized(p) for p in files if p.isfile()]
187 return [self._normalized(p) for p in files if p.isfile()]
187
188
188 def uncache(self,*items):
189 def uncache(self,*items):
189 """ Removes all, or specified items from cache
190 """ Removes all, or specified items from cache
190
191
191 Use this after reading a large amount of large objects
192 Use this after reading a large amount of large objects
192 to free up memory, when you won't be needing the objects
193 to free up memory, when you won't be needing the objects
193 for a while.
194 for a while.
194
195
195 """
196 """
196 if not items:
197 if not items:
197 self.cache = {}
198 self.cache = {}
198 for it in items:
199 for it in items:
199 self.cache.pop(it,None)
200 self.cache.pop(it,None)
200
201
201 def waitget(self,key, maxwaittime = 60 ):
202 def waitget(self,key, maxwaittime = 60 ):
202 """ Wait (poll) for a key to get a value
203 """ Wait (poll) for a key to get a value
203
204
204 Will wait for `maxwaittime` seconds before raising a KeyError.
205 Will wait for `maxwaittime` seconds before raising a KeyError.
205 The call exits normally if the `key` field in db gets a value
206 The call exits normally if the `key` field in db gets a value
206 within the timeout period.
207 within the timeout period.
207
208
208 Use this for synchronizing different processes or for ensuring
209 Use this for synchronizing different processes or for ensuring
209 that an unfortunately timed "db['key'] = newvalue" operation
210 that an unfortunately timed "db['key'] = newvalue" operation
210 in another process (which causes all 'get' operation to cause a
211 in another process (which causes all 'get' operation to cause a
211 KeyError for the duration of pickling) won't screw up your program
212 KeyError for the duration of pickling) won't screw up your program
212 logic.
213 logic.
213 """
214 """
214
215
215 wtimes = [0.2] * 3 + [0.5] * 2 + [1]
216 wtimes = [0.2] * 3 + [0.5] * 2 + [1]
216 tries = 0
217 tries = 0
217 waited = 0
218 waited = 0
218 while 1:
219 while 1:
219 try:
220 try:
220 val = self[key]
221 val = self[key]
221 return val
222 return val
222 except KeyError:
223 except KeyError:
223 pass
224 pass
224
225
225 if waited > maxwaittime:
226 if waited > maxwaittime:
226 raise KeyError(key)
227 raise KeyError(key)
227
228
228 time.sleep(wtimes[tries])
229 time.sleep(wtimes[tries])
229 waited+=wtimes[tries]
230 waited+=wtimes[tries]
230 if tries < len(wtimes) -1:
231 if tries < len(wtimes) -1:
231 tries+=1
232 tries+=1
232
233
233 def getlink(self,folder):
234 def getlink(self,folder):
234 """ Get a convenient link for accessing items """
235 """ Get a convenient link for accessing items """
235 return PickleShareLink(self, folder)
236 return PickleShareLink(self, folder)
236
237
237 def __repr__(self):
238 def __repr__(self):
238 return "PickleShareDB('%s')" % self.root
239 return "PickleShareDB('%s')" % self.root
239
240
240
241
241
242
242 class PickleShareLink:
243 class PickleShareLink:
243 """ A shortdand for accessing nested PickleShare data conveniently.
244 """ A shortdand for accessing nested PickleShare data conveniently.
244
245
245 Created through PickleShareDB.getlink(), example::
246 Created through PickleShareDB.getlink(), example::
246
247
247 lnk = db.getlink('myobjects/test')
248 lnk = db.getlink('myobjects/test')
248 lnk.foo = 2
249 lnk.foo = 2
249 lnk.bar = lnk.foo + 5
250 lnk.bar = lnk.foo + 5
250
251
251 """
252 """
252 def __init__(self, db, keydir ):
253 def __init__(self, db, keydir ):
253 self.__dict__.update(locals())
254 self.__dict__.update(locals())
254
255
255 def __getattr__(self,key):
256 def __getattr__(self,key):
256 return self.__dict__['db'][self.__dict__['keydir']+'/' + key]
257 return self.__dict__['db'][self.__dict__['keydir']+'/' + key]
257 def __setattr__(self,key,val):
258 def __setattr__(self,key,val):
258 self.db[self.keydir+'/' + key] = val
259 self.db[self.keydir+'/' + key] = val
259 def __repr__(self):
260 def __repr__(self):
260 db = self.__dict__['db']
261 db = self.__dict__['db']
261 keys = db.keys( self.__dict__['keydir'] +"/*")
262 keys = db.keys( self.__dict__['keydir'] +"/*")
262 return "<PickleShareLink '%s': %s>" % (
263 return "<PickleShareLink '%s': %s>" % (
263 self.__dict__['keydir'],
264 self.__dict__['keydir'],
264 ";".join([Path(k).basename() for k in keys]))
265 ";".join([Path(k).basename() for k in keys]))
265
266
266
267
267 def test():
268 def test():
268 db = PickleShareDB('~/testpickleshare')
269 db = PickleShareDB('~/testpickleshare')
269 db.clear()
270 db.clear()
270 print "Should be empty:",db.items()
271 print "Should be empty:",db.items()
271 db['hello'] = 15
272 db['hello'] = 15
272 db['aku ankka'] = [1,2,313]
273 db['aku ankka'] = [1,2,313]
273 db['paths/nest/ok/keyname'] = [1,(5,46)]
274 db['paths/nest/ok/keyname'] = [1,(5,46)]
274 db.hset('hash', 'aku', 12)
275 db.hset('hash', 'aku', 12)
275 db.hset('hash', 'ankka', 313)
276 db.hset('hash', 'ankka', 313)
276 print "12 =",db.hget('hash','aku')
277 print "12 =",db.hget('hash','aku')
277 print "313 =",db.hget('hash','ankka')
278 print "313 =",db.hget('hash','ankka')
278 print "all hashed",db.hdict('hash')
279 print "all hashed",db.hdict('hash')
279 print db.keys()
280 print db.keys()
280 print db.keys('paths/nest/ok/k*')
281 print db.keys('paths/nest/ok/k*')
281 print dict(db) # snapsot of whole db
282 print dict(db) # snapsot of whole db
282 db.uncache() # frees memory, causes re-reads later
283 db.uncache() # frees memory, causes re-reads later
283
284
284 # shorthand for accessing deeply nested files
285 # shorthand for accessing deeply nested files
285 lnk = db.getlink('myobjects/test')
286 lnk = db.getlink('myobjects/test')
286 lnk.foo = 2
287 lnk.foo = 2
287 lnk.bar = lnk.foo + 5
288 lnk.bar = lnk.foo + 5
288 print lnk.bar # 7
289 print lnk.bar # 7
289
290
290 def stress():
291 def stress():
291 db = PickleShareDB('~/fsdbtest')
292 db = PickleShareDB('~/fsdbtest')
292 import time,sys
293 import time,sys
293 for i in range(1000):
294 for i in range(1000):
294 for j in range(1000):
295 for j in range(1000):
295 if i % 15 == 0 and i < 200:
296 if i % 15 == 0 and i < 200:
296 if str(j) in db:
297 if str(j) in db:
297 del db[str(j)]
298 del db[str(j)]
298 continue
299 continue
299
300
300 if j%33 == 0:
301 if j%33 == 0:
301 time.sleep(0.02)
302 time.sleep(0.02)
302
303
303 db[str(j)] = db.get(str(j), []) + [(i,j,"proc %d" % os.getpid())]
304 db[str(j)] = db.get(str(j), []) + [(i,j,"proc %d" % os.getpid())]
304 db.hset('hash',j, db.hget('hash',j,15) + 1 )
305 db.hset('hash',j, db.hget('hash',j,15) + 1 )
305
306
306 print i,
307 print i,
307 sys.stdout.flush()
308 sys.stdout.flush()
308 if i % 10 == 0:
309 if i % 10 == 0:
309 db.uncache()
310 db.uncache()
310
311
311 def main():
312 def main():
312 import textwrap
313 import textwrap
313 usage = textwrap.dedent("""\
314 usage = textwrap.dedent("""\
314 pickleshare - manage PickleShare databases
315 pickleshare - manage PickleShare databases
315
316
316 Usage:
317 Usage:
317
318
318 pickleshare dump /path/to/db > dump.txt
319 pickleshare dump /path/to/db > dump.txt
319 pickleshare load /path/to/db < dump.txt
320 pickleshare load /path/to/db < dump.txt
320 pickleshare test /path/to/db
321 pickleshare test /path/to/db
321 """)
322 """)
322 DB = PickleShareDB
323 DB = PickleShareDB
323 import sys
324 import sys
324 if len(sys.argv) < 2:
325 if len(sys.argv) < 2:
325 print usage
326 print usage
326 return
327 return
327
328
328 cmd = sys.argv[1]
329 cmd = sys.argv[1]
329 args = sys.argv[2:]
330 args = sys.argv[2:]
330 if cmd == 'dump':
331 if cmd == 'dump':
331 if not args: args= ['.']
332 if not args: args= ['.']
332 db = DB(args[0])
333 db = DB(args[0])
333 import pprint
334 import pprint
334 pprint.pprint(db.items())
335 pprint.pprint(db.items())
335 elif cmd == 'load':
336 elif cmd == 'load':
336 cont = sys.stdin.read()
337 cont = sys.stdin.read()
337 db = DB(args[0])
338 db = DB(args[0])
338 data = eval(cont)
339 data = eval(cont)
339 db.clear()
340 db.clear()
340 for k,v in db.items():
341 for k,v in db.items():
341 db[k] = v
342 db[k] = v
342 elif cmd == 'testwait':
343 elif cmd == 'testwait':
343 db = DB(args[0])
344 db = DB(args[0])
344 db.clear()
345 db.clear()
345 print db.waitget('250')
346 print db.waitget('250')
346 elif cmd == 'test':
347 elif cmd == 'test':
347 test()
348 test()
348 stress()
349 stress()
349
350
350 if __name__== "__main__":
351 if __name__== "__main__":
351 main()
352 main()
352
353
353 No newline at end of file
354
General Comments 0
You need to be logged in to leave comments. Login now