##// END OF EJS Templates
pickleshare: hget, hset, hdict (for write efficient hash bucket file storage)
vivainio -
Show More
@@ -1,267 +1,306 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2
2
3 """ PickleShare - a small 'shelve' like datastore with concurrency support
3 """ PickleShare - a small 'shelve' like datastore with concurrency support
4
4
5 Like shelve, a PickleShareDB object acts like a normal dictionary. Unlike
5 Like shelve, a PickleShareDB object acts like a normal dictionary. Unlike
6 shelve, many processes can access the database simultaneously. Changing a
6 shelve, many processes can access the database simultaneously. Changing a
7 value in database is immediately visible to other processes accessing the
7 value in database is immediately visible to other processes accessing the
8 same database.
8 same database.
9
9
10 Concurrency is possible because the values are stored in separate files. Hence
10 Concurrency is possible because the values are stored in separate files. Hence
11 the "database" is a directory where *all* files are governed by PickleShare.
11 the "database" is a directory where *all* files are governed by PickleShare.
12
12
13 Example usage::
13 Example usage::
14
14
15 from pickleshare import *
15 from pickleshare import *
16 db = PickleShareDB('~/testpickleshare')
16 db = PickleShareDB('~/testpickleshare')
17 db.clear()
17 db.clear()
18 print "Should be empty:",db.items()
18 print "Should be empty:",db.items()
19 db['hello'] = 15
19 db['hello'] = 15
20 db['aku ankka'] = [1,2,313]
20 db['aku ankka'] = [1,2,313]
21 db['paths/are/ok/key'] = [1,(5,46)]
21 db['paths/are/ok/key'] = [1,(5,46)]
22 print db.keys()
22 print db.keys()
23 del db['aku ankka']
23 del db['aku ankka']
24
24
25 This module is certainly not ZODB, but can be used for low-load
25 This module is certainly not ZODB, but can be used for low-load
26 (non-mission-critical) situations where tiny code size trumps the
26 (non-mission-critical) situations where tiny code size trumps the
27 advanced features of a "real" object database.
27 advanced features of a "real" object database.
28
28
29 Installation guide: easy_install pickleshare
29 Installation guide: easy_install pickleshare
30
30
31 Author: Ville Vainio <vivainio@gmail.com>
31 Author: Ville Vainio <vivainio@gmail.com>
32 License: MIT open source license.
32 License: MIT open source license.
33
33
34 """
34 """
35
35
36 from path import path as Path
36 from path import path as Path
37 import os,stat,time
37 import os,stat,time
38 import cPickle as pickle
38 import cPickle as pickle
39 import UserDict
39 import UserDict
40 import warnings
40 import warnings
41 import glob
41 import glob
42
42
43 def gethashfile(key):
44 return ("%02x" % abs(hash(key) % 256))[-2:]
45
43 class PickleShareDB(UserDict.DictMixin):
46 class PickleShareDB(UserDict.DictMixin):
44 """ The main 'connection' object for PickleShare database """
47 """ The main 'connection' object for PickleShare database """
45 def __init__(self,root):
48 def __init__(self,root):
46 """ Return a db object that will manage the specied directory"""
49 """ Return a db object that will manage the specied directory"""
47 self.root = Path(root).expanduser().abspath()
50 self.root = Path(root).expanduser().abspath()
48 if not self.root.isdir():
51 if not self.root.isdir():
49 self.root.makedirs()
52 self.root.makedirs()
50 # cache has { 'key' : (obj, orig_mod_time) }
53 # cache has { 'key' : (obj, orig_mod_time) }
51 self.cache = {}
54 self.cache = {}
52
55
53 def __getitem__(self,key):
56 def __getitem__(self,key):
54 """ db['key'] reading """
57 """ db['key'] reading """
55 fil = self.root / key
58 fil = self.root / key
56 try:
59 try:
57 mtime = (fil.stat()[stat.ST_MTIME])
60 mtime = (fil.stat()[stat.ST_MTIME])
58 except OSError:
61 except OSError:
59 raise KeyError(key)
62 raise KeyError(key)
60
63
61 if fil in self.cache and mtime == self.cache[fil][1]:
64 if fil in self.cache and mtime == self.cache[fil][1]:
62 return self.cache[fil][0]
65 return self.cache[fil][0]
63 try:
66 try:
64 # The cached item has expired, need to read
67 # The cached item has expired, need to read
65 obj = pickle.load(fil.open())
68 obj = pickle.load(fil.open())
66 except:
69 except:
67 raise KeyError(key)
70 raise KeyError(key)
68
71
69 self.cache[fil] = (obj,mtime)
72 self.cache[fil] = (obj,mtime)
70 return obj
73 return obj
71
74
72 def __setitem__(self,key,value):
75 def __setitem__(self,key,value):
73 """ db['key'] = 5 """
76 """ db['key'] = 5 """
74 fil = self.root / key
77 fil = self.root / key
75 parent = fil.parent
78 parent = fil.parent
76 if parent and not parent.isdir():
79 if parent and not parent.isdir():
77 parent.makedirs()
80 parent.makedirs()
78 pickled = pickle.dump(value,fil.open('w'))
81 pickled = pickle.dump(value,fil.open('w'))
79 try:
82 try:
80 self.cache[fil] = (value,fil.mtime)
83 self.cache[fil] = (value,fil.mtime)
81 except OSError,e:
84 except OSError,e:
82 if e.errno != 2:
85 if e.errno != 2:
83 raise
86 raise
84
87
88 def hset(self, hashroot, key, value):
89 hroot = self.root / hashroot
90 if not hroot.isdir():
91 hroot.makedirs()
92 hfile = hroot / gethashfile(key)
93 d = self.get(hfile, {})
94 d.update( {key : value})
95 self[hfile] = d
96
97 def hget(self, hashroot, key, default = None):
98 hroot = self.root / hashroot
99 hfile = hroot / gethashfile(key)
100 d = self.get(hfile, None)
101 #print "got dict",d,"from",hfile
102 if d is None:
103 return default
104 return d.get(key, default)
105
106 def hdict(self, hashroot):
107 buckets = self.keys(hashroot + "/*")
108 hfiles = [f for f in buckets]
109 all = {}
110 for f in hfiles:
111 # print "using",f
112 all.update(self[f])
113 self.uncache(f)
114
115 return all
116
85 def __delitem__(self,key):
117 def __delitem__(self,key):
86 """ del db["key"] """
118 """ del db["key"] """
87 fil = self.root / key
119 fil = self.root / key
88 self.cache.pop(fil,None)
120 self.cache.pop(fil,None)
89 try:
121 try:
90 fil.remove()
122 fil.remove()
91 except OSError:
123 except OSError:
92 # notfound and permission denied are ok - we
124 # notfound and permission denied are ok - we
93 # lost, the other process wins the conflict
125 # lost, the other process wins the conflict
94 pass
126 pass
95
127
96 def _normalized(self, p):
128 def _normalized(self, p):
97 """ Make a key suitable for user's eyes """
129 """ Make a key suitable for user's eyes """
98 return str(self.root.relpathto(p)).replace('\\','/')
130 return str(self.root.relpathto(p)).replace('\\','/')
99
131
100 def keys(self, globpat = None):
132 def keys(self, globpat = None):
101 """ All keys in DB, or all keys matching a glob"""
133 """ All keys in DB, or all keys matching a glob"""
102
134
103 if globpat is None:
135 if globpat is None:
104 files = self.root.walkfiles()
136 files = self.root.walkfiles()
105 else:
137 else:
106 files = [Path(p) for p in glob.glob(self.root/globpat)]
138 files = [Path(p) for p in glob.glob(self.root/globpat)]
107 return [self._normalized(p) for p in files if p.isfile()]
139 return [self._normalized(p) for p in files if p.isfile()]
108
140
109 def uncache(self,*items):
141 def uncache(self,*items):
110 """ Removes all, or specified items from cache
142 """ Removes all, or specified items from cache
111
143
112 Use this after reading a large amount of large objects
144 Use this after reading a large amount of large objects
113 to free up memory, when you won't be needing the objects
145 to free up memory, when you won't be needing the objects
114 for a while.
146 for a while.
115
147
116 """
148 """
117 if not items:
149 if not items:
118 self.cache = {}
150 self.cache = {}
119 for it in items:
151 for it in items:
120 self.cache.pop(it,None)
152 self.cache.pop(it,None)
121
153
122 def waitget(self,key, maxwaittime = 60 ):
154 def waitget(self,key, maxwaittime = 60 ):
123 """ Wait (poll) for a key to get a value
155 """ Wait (poll) for a key to get a value
124
156
125 Will wait for `maxwaittime` seconds before raising a KeyError.
157 Will wait for `maxwaittime` seconds before raising a KeyError.
126 The call exits normally if the `key` field in db gets a value
158 The call exits normally if the `key` field in db gets a value
127 within the timeout period.
159 within the timeout period.
128
160
129 Use this for synchronizing different processes or for ensuring
161 Use this for synchronizing different processes or for ensuring
130 that an unfortunately timed "db['key'] = newvalue" operation
162 that an unfortunately timed "db['key'] = newvalue" operation
131 in another process (which causes all 'get' operation to cause a
163 in another process (which causes all 'get' operation to cause a
132 KeyError for the duration of pickling) won't screw up your program
164 KeyError for the duration of pickling) won't screw up your program
133 logic.
165 logic.
134 """
166 """
135
167
136 wtimes = [0.2] * 3 + [0.5] * 2 + [1]
168 wtimes = [0.2] * 3 + [0.5] * 2 + [1]
137 tries = 0
169 tries = 0
138 waited = 0
170 waited = 0
139 while 1:
171 while 1:
140 try:
172 try:
141 val = self[key]
173 val = self[key]
142 return val
174 return val
143 except KeyError:
175 except KeyError:
144 pass
176 pass
145
177
146 if waited > maxwaittime:
178 if waited > maxwaittime:
147 raise KeyError(key)
179 raise KeyError(key)
148
180
149 time.sleep(wtimes[tries])
181 time.sleep(wtimes[tries])
150 waited+=wtimes[tries]
182 waited+=wtimes[tries]
151 if tries < len(wtimes) -1:
183 if tries < len(wtimes) -1:
152 tries+=1
184 tries+=1
153
185
154 def getlink(self,folder):
186 def getlink(self,folder):
155 """ Get a convenient link for accessing items """
187 """ Get a convenient link for accessing items """
156 return PickleShareLink(self, folder)
188 return PickleShareLink(self, folder)
157
189
158 def __repr__(self):
190 def __repr__(self):
159 return "PickleShareDB('%s')" % self.root
191 return "PickleShareDB('%s')" % self.root
160
192
161
193
162
194
163 class PickleShareLink:
195 class PickleShareLink:
164 """ A shortdand for accessing nested PickleShare data conveniently.
196 """ A shortdand for accessing nested PickleShare data conveniently.
165
197
166 Created through PickleShareDB.getlink(), example::
198 Created through PickleShareDB.getlink(), example::
167
199
168 lnk = db.getlink('myobjects/test')
200 lnk = db.getlink('myobjects/test')
169 lnk.foo = 2
201 lnk.foo = 2
170 lnk.bar = lnk.foo + 5
202 lnk.bar = lnk.foo + 5
171
203
172 """
204 """
173 def __init__(self, db, keydir ):
205 def __init__(self, db, keydir ):
174 self.__dict__.update(locals())
206 self.__dict__.update(locals())
175
207
176 def __getattr__(self,key):
208 def __getattr__(self,key):
177 return self.__dict__['db'][self.__dict__['keydir']+'/' + key]
209 return self.__dict__['db'][self.__dict__['keydir']+'/' + key]
178 def __setattr__(self,key,val):
210 def __setattr__(self,key,val):
179 self.db[self.keydir+'/' + key] = val
211 self.db[self.keydir+'/' + key] = val
180 def __repr__(self):
212 def __repr__(self):
181 db = self.__dict__['db']
213 db = self.__dict__['db']
182 keys = db.keys( self.__dict__['keydir'] +"/*")
214 keys = db.keys( self.__dict__['keydir'] +"/*")
183 return "<PickleShareLink '%s': %s>" % (
215 return "<PickleShareLink '%s': %s>" % (
184 self.__dict__['keydir'],
216 self.__dict__['keydir'],
185 ";".join([Path(k).basename() for k in keys]))
217 ";".join([Path(k).basename() for k in keys]))
186
218
187
219
188 def test():
220 def test():
189 db = PickleShareDB('~/testpickleshare')
221 db = PickleShareDB('~/testpickleshare')
190 db.clear()
222 db.clear()
191 print "Should be empty:",db.items()
223 print "Should be empty:",db.items()
192 db['hello'] = 15
224 db['hello'] = 15
193 db['aku ankka'] = [1,2,313]
225 db['aku ankka'] = [1,2,313]
194 db['paths/nest/ok/keyname'] = [1,(5,46)]
226 db['paths/nest/ok/keyname'] = [1,(5,46)]
227 db.hset('hash', 'aku', 12)
228 db.hset('hash', 'ankka', 313)
229 print "12 =",db.hget('hash','aku')
230 print "313 =",db.hget('hash','ankka')
231 print "all hashed",db.hdict('hash')
195 print db.keys()
232 print db.keys()
196 print db.keys('paths/nest/ok/k*')
233 print db.keys('paths/nest/ok/k*')
197 print dict(db) # snapsot of whole db
234 print dict(db) # snapsot of whole db
198 db.uncache() # frees memory, causes re-reads later
235 db.uncache() # frees memory, causes re-reads later
199
236
200 # shorthand for accessing deeply nested files
237 # shorthand for accessing deeply nested files
201 lnk = db.getlink('myobjects/test')
238 lnk = db.getlink('myobjects/test')
202 lnk.foo = 2
239 lnk.foo = 2
203 lnk.bar = lnk.foo + 5
240 lnk.bar = lnk.foo + 5
204 print lnk.bar # 7
241 print lnk.bar # 7
205
242
206 def stress():
243 def stress():
207 db = PickleShareDB('~/fsdbtest')
244 db = PickleShareDB('~/fsdbtest')
208 import time,sys
245 import time,sys
209 for i in range(1000):
246 for i in range(1000):
210 for j in range(300):
247 for j in range(1000):
211 if i % 15 == 0 and i < 200:
248 if i % 15 == 0 and i < 200:
212 if str(j) in db:
249 if str(j) in db:
213 del db[str(j)]
250 del db[str(j)]
214 continue
251 continue
215
252
216 if j%33 == 0:
253 if j%33 == 0:
217 time.sleep(0.02)
254 time.sleep(0.02)
218
255
219 db[str(j)] = db.get(str(j), []) + [(i,j,"proc %d" % os.getpid())]
256 db[str(j)] = db.get(str(j), []) + [(i,j,"proc %d" % os.getpid())]
257 db.hset('hash',j, db.hget('hash',j,15) + 1 )
258
220 print i,
259 print i,
221 sys.stdout.flush()
260 sys.stdout.flush()
222 if i % 10 == 0:
261 if i % 10 == 0:
223 db.uncache()
262 db.uncache()
224
263
225 def main():
264 def main():
226 import textwrap
265 import textwrap
227 usage = textwrap.dedent("""\
266 usage = textwrap.dedent("""\
228 pickleshare - manage PickleShare databases
267 pickleshare - manage PickleShare databases
229
268
230 Usage:
269 Usage:
231
270
232 pickleshare dump /path/to/db > dump.txt
271 pickleshare dump /path/to/db > dump.txt
233 pickleshare load /path/to/db < dump.txt
272 pickleshare load /path/to/db < dump.txt
234 pickleshare test /path/to/db
273 pickleshare test /path/to/db
235 """)
274 """)
236 DB = PickleShareDB
275 DB = PickleShareDB
237 import sys
276 import sys
238 if len(sys.argv) < 2:
277 if len(sys.argv) < 2:
239 print usage
278 print usage
240 return
279 return
241
280
242 cmd = sys.argv[1]
281 cmd = sys.argv[1]
243 args = sys.argv[2:]
282 args = sys.argv[2:]
244 if cmd == 'dump':
283 if cmd == 'dump':
245 if not args: args= ['.']
284 if not args: args= ['.']
246 db = DB(args[0])
285 db = DB(args[0])
247 import pprint
286 import pprint
248 pprint.pprint(db.items())
287 pprint.pprint(db.items())
249 elif cmd == 'load':
288 elif cmd == 'load':
250 cont = sys.stdin.read()
289 cont = sys.stdin.read()
251 db = DB(args[0])
290 db = DB(args[0])
252 data = eval(cont)
291 data = eval(cont)
253 db.clear()
292 db.clear()
254 for k,v in db.items():
293 for k,v in db.items():
255 db[k] = v
294 db[k] = v
256 elif cmd == 'testwait':
295 elif cmd == 'testwait':
257 db = DB(args[0])
296 db = DB(args[0])
258 db.clear()
297 db.clear()
259 print db.waitget('250')
298 print db.waitget('250')
260 elif cmd == 'test':
299 elif cmd == 'test':
261 test()
300 test()
262 stress()
301 stress()
263
302
264 if __name__== "__main__":
303 if __name__== "__main__":
265 main()
304 main()
266
305
267 No newline at end of file
306
General Comments 0
You need to be logged in to leave comments. Login now