##// END OF EJS Templates
pickleshare: hget, hset, hdict (for write efficient hash bucket file storage)
vivainio -
Show More
@@ -1,267 +1,306 b''
1 1 #!/usr/bin/env python
2 2
3 3 """ PickleShare - a small 'shelve' like datastore with concurrency support
4 4
5 5 Like shelve, a PickleShareDB object acts like a normal dictionary. Unlike
6 6 shelve, many processes can access the database simultaneously. Changing a
7 7 value in database is immediately visible to other processes accessing the
8 8 same database.
9 9
10 10 Concurrency is possible because the values are stored in separate files. Hence
11 11 the "database" is a directory where *all* files are governed by PickleShare.
12 12
13 13 Example usage::
14 14
15 15 from pickleshare import *
16 16 db = PickleShareDB('~/testpickleshare')
17 17 db.clear()
18 18 print "Should be empty:",db.items()
19 19 db['hello'] = 15
20 20 db['aku ankka'] = [1,2,313]
21 21 db['paths/are/ok/key'] = [1,(5,46)]
22 22 print db.keys()
23 23 del db['aku ankka']
24 24
25 25 This module is certainly not ZODB, but can be used for low-load
26 26 (non-mission-critical) situations where tiny code size trumps the
27 27 advanced features of a "real" object database.
28 28
29 29 Installation guide: easy_install pickleshare
30 30
31 31 Author: Ville Vainio <vivainio@gmail.com>
32 32 License: MIT open source license.
33 33
34 34 """
35 35
36 36 from path import path as Path
37 37 import os,stat,time
38 38 import cPickle as pickle
39 39 import UserDict
40 40 import warnings
41 41 import glob
42 42
43 def gethashfile(key):
44 return ("%02x" % abs(hash(key) % 256))[-2:]
45
43 46 class PickleShareDB(UserDict.DictMixin):
44 47 """ The main 'connection' object for PickleShare database """
45 48 def __init__(self,root):
46 49 """ Return a db object that will manage the specied directory"""
47 50 self.root = Path(root).expanduser().abspath()
48 51 if not self.root.isdir():
49 52 self.root.makedirs()
50 53 # cache has { 'key' : (obj, orig_mod_time) }
51 54 self.cache = {}
52 55
53 56 def __getitem__(self,key):
54 57 """ db['key'] reading """
55 58 fil = self.root / key
56 59 try:
57 60 mtime = (fil.stat()[stat.ST_MTIME])
58 61 except OSError:
59 62 raise KeyError(key)
60 63
61 64 if fil in self.cache and mtime == self.cache[fil][1]:
62 65 return self.cache[fil][0]
63 66 try:
64 67 # The cached item has expired, need to read
65 68 obj = pickle.load(fil.open())
66 69 except:
67 70 raise KeyError(key)
68 71
69 72 self.cache[fil] = (obj,mtime)
70 73 return obj
71 74
72 75 def __setitem__(self,key,value):
73 76 """ db['key'] = 5 """
74 77 fil = self.root / key
75 78 parent = fil.parent
76 79 if parent and not parent.isdir():
77 80 parent.makedirs()
78 81 pickled = pickle.dump(value,fil.open('w'))
79 82 try:
80 83 self.cache[fil] = (value,fil.mtime)
81 84 except OSError,e:
82 85 if e.errno != 2:
83 86 raise
84 87
88 def hset(self, hashroot, key, value):
89 hroot = self.root / hashroot
90 if not hroot.isdir():
91 hroot.makedirs()
92 hfile = hroot / gethashfile(key)
93 d = self.get(hfile, {})
94 d.update( {key : value})
95 self[hfile] = d
96
97 def hget(self, hashroot, key, default = None):
98 hroot = self.root / hashroot
99 hfile = hroot / gethashfile(key)
100 d = self.get(hfile, None)
101 #print "got dict",d,"from",hfile
102 if d is None:
103 return default
104 return d.get(key, default)
105
106 def hdict(self, hashroot):
107 buckets = self.keys(hashroot + "/*")
108 hfiles = [f for f in buckets]
109 all = {}
110 for f in hfiles:
111 # print "using",f
112 all.update(self[f])
113 self.uncache(f)
114
115 return all
116
85 117 def __delitem__(self,key):
86 118 """ del db["key"] """
87 119 fil = self.root / key
88 120 self.cache.pop(fil,None)
89 121 try:
90 122 fil.remove()
91 123 except OSError:
92 124 # notfound and permission denied are ok - we
93 125 # lost, the other process wins the conflict
94 126 pass
95 127
96 128 def _normalized(self, p):
97 129 """ Make a key suitable for user's eyes """
98 130 return str(self.root.relpathto(p)).replace('\\','/')
99 131
100 132 def keys(self, globpat = None):
101 133 """ All keys in DB, or all keys matching a glob"""
102 134
103 135 if globpat is None:
104 136 files = self.root.walkfiles()
105 137 else:
106 138 files = [Path(p) for p in glob.glob(self.root/globpat)]
107 139 return [self._normalized(p) for p in files if p.isfile()]
108 140
109 141 def uncache(self,*items):
110 142 """ Removes all, or specified items from cache
111 143
112 144 Use this after reading a large amount of large objects
113 145 to free up memory, when you won't be needing the objects
114 146 for a while.
115 147
116 148 """
117 149 if not items:
118 150 self.cache = {}
119 151 for it in items:
120 152 self.cache.pop(it,None)
121 153
122 154 def waitget(self,key, maxwaittime = 60 ):
123 155 """ Wait (poll) for a key to get a value
124 156
125 157 Will wait for `maxwaittime` seconds before raising a KeyError.
126 158 The call exits normally if the `key` field in db gets a value
127 159 within the timeout period.
128 160
129 161 Use this for synchronizing different processes or for ensuring
130 162 that an unfortunately timed "db['key'] = newvalue" operation
131 163 in another process (which causes all 'get' operation to cause a
132 164 KeyError for the duration of pickling) won't screw up your program
133 165 logic.
134 166 """
135 167
136 168 wtimes = [0.2] * 3 + [0.5] * 2 + [1]
137 169 tries = 0
138 170 waited = 0
139 171 while 1:
140 172 try:
141 173 val = self[key]
142 174 return val
143 175 except KeyError:
144 176 pass
145 177
146 178 if waited > maxwaittime:
147 179 raise KeyError(key)
148 180
149 181 time.sleep(wtimes[tries])
150 182 waited+=wtimes[tries]
151 183 if tries < len(wtimes) -1:
152 184 tries+=1
153 185
154 186 def getlink(self,folder):
155 187 """ Get a convenient link for accessing items """
156 188 return PickleShareLink(self, folder)
157 189
158 190 def __repr__(self):
159 191 return "PickleShareDB('%s')" % self.root
160 192
161 193
162 194
163 195 class PickleShareLink:
164 196 """ A shortdand for accessing nested PickleShare data conveniently.
165 197
166 198 Created through PickleShareDB.getlink(), example::
167 199
168 200 lnk = db.getlink('myobjects/test')
169 201 lnk.foo = 2
170 202 lnk.bar = lnk.foo + 5
171 203
172 204 """
173 205 def __init__(self, db, keydir ):
174 206 self.__dict__.update(locals())
175 207
176 208 def __getattr__(self,key):
177 209 return self.__dict__['db'][self.__dict__['keydir']+'/' + key]
178 210 def __setattr__(self,key,val):
179 211 self.db[self.keydir+'/' + key] = val
180 212 def __repr__(self):
181 213 db = self.__dict__['db']
182 214 keys = db.keys( self.__dict__['keydir'] +"/*")
183 215 return "<PickleShareLink '%s': %s>" % (
184 216 self.__dict__['keydir'],
185 217 ";".join([Path(k).basename() for k in keys]))
186 218
187 219
188 220 def test():
189 221 db = PickleShareDB('~/testpickleshare')
190 222 db.clear()
191 223 print "Should be empty:",db.items()
192 224 db['hello'] = 15
193 225 db['aku ankka'] = [1,2,313]
194 226 db['paths/nest/ok/keyname'] = [1,(5,46)]
227 db.hset('hash', 'aku', 12)
228 db.hset('hash', 'ankka', 313)
229 print "12 =",db.hget('hash','aku')
230 print "313 =",db.hget('hash','ankka')
231 print "all hashed",db.hdict('hash')
195 232 print db.keys()
196 233 print db.keys('paths/nest/ok/k*')
197 234 print dict(db) # snapsot of whole db
198 235 db.uncache() # frees memory, causes re-reads later
199 236
200 237 # shorthand for accessing deeply nested files
201 238 lnk = db.getlink('myobjects/test')
202 239 lnk.foo = 2
203 240 lnk.bar = lnk.foo + 5
204 241 print lnk.bar # 7
205 242
206 243 def stress():
207 244 db = PickleShareDB('~/fsdbtest')
208 245 import time,sys
209 246 for i in range(1000):
210 for j in range(300):
247 for j in range(1000):
211 248 if i % 15 == 0 and i < 200:
212 249 if str(j) in db:
213 250 del db[str(j)]
214 251 continue
215 252
216 253 if j%33 == 0:
217 254 time.sleep(0.02)
218 255
219 256 db[str(j)] = db.get(str(j), []) + [(i,j,"proc %d" % os.getpid())]
257 db.hset('hash',j, db.hget('hash',j,15) + 1 )
258
220 259 print i,
221 260 sys.stdout.flush()
222 261 if i % 10 == 0:
223 262 db.uncache()
224 263
225 264 def main():
226 265 import textwrap
227 266 usage = textwrap.dedent("""\
228 267 pickleshare - manage PickleShare databases
229 268
230 269 Usage:
231 270
232 271 pickleshare dump /path/to/db > dump.txt
233 272 pickleshare load /path/to/db < dump.txt
234 273 pickleshare test /path/to/db
235 274 """)
236 275 DB = PickleShareDB
237 276 import sys
238 277 if len(sys.argv) < 2:
239 278 print usage
240 279 return
241 280
242 281 cmd = sys.argv[1]
243 282 args = sys.argv[2:]
244 283 if cmd == 'dump':
245 284 if not args: args= ['.']
246 285 db = DB(args[0])
247 286 import pprint
248 287 pprint.pprint(db.items())
249 288 elif cmd == 'load':
250 289 cont = sys.stdin.read()
251 290 db = DB(args[0])
252 291 data = eval(cont)
253 292 db.clear()
254 293 for k,v in db.items():
255 294 db[k] = v
256 295 elif cmd == 'testwait':
257 296 db = DB(args[0])
258 297 db.clear()
259 298 print db.waitget('250')
260 299 elif cmd == 'test':
261 300 test()
262 301 stress()
263 302
264 303 if __name__== "__main__":
265 304 main()
266 305
267 306 No newline at end of file
General Comments 0
You need to be logged in to leave comments. Login now