Show More
@@ -1,267 +1,306 b'' | |||||
1 | #!/usr/bin/env python |
|
1 | #!/usr/bin/env python | |
2 |
|
2 | |||
3 | """ PickleShare - a small 'shelve' like datastore with concurrency support |
|
3 | """ PickleShare - a small 'shelve' like datastore with concurrency support | |
4 |
|
4 | |||
5 | Like shelve, a PickleShareDB object acts like a normal dictionary. Unlike |
|
5 | Like shelve, a PickleShareDB object acts like a normal dictionary. Unlike | |
6 | shelve, many processes can access the database simultaneously. Changing a |
|
6 | shelve, many processes can access the database simultaneously. Changing a | |
7 | value in database is immediately visible to other processes accessing the |
|
7 | value in database is immediately visible to other processes accessing the | |
8 | same database. |
|
8 | same database. | |
9 |
|
9 | |||
10 | Concurrency is possible because the values are stored in separate files. Hence |
|
10 | Concurrency is possible because the values are stored in separate files. Hence | |
11 | the "database" is a directory where *all* files are governed by PickleShare. |
|
11 | the "database" is a directory where *all* files are governed by PickleShare. | |
12 |
|
12 | |||
13 | Example usage:: |
|
13 | Example usage:: | |
14 |
|
14 | |||
15 | from pickleshare import * |
|
15 | from pickleshare import * | |
16 | db = PickleShareDB('~/testpickleshare') |
|
16 | db = PickleShareDB('~/testpickleshare') | |
17 | db.clear() |
|
17 | db.clear() | |
18 | print "Should be empty:",db.items() |
|
18 | print "Should be empty:",db.items() | |
19 | db['hello'] = 15 |
|
19 | db['hello'] = 15 | |
20 | db['aku ankka'] = [1,2,313] |
|
20 | db['aku ankka'] = [1,2,313] | |
21 | db['paths/are/ok/key'] = [1,(5,46)] |
|
21 | db['paths/are/ok/key'] = [1,(5,46)] | |
22 | print db.keys() |
|
22 | print db.keys() | |
23 | del db['aku ankka'] |
|
23 | del db['aku ankka'] | |
24 |
|
24 | |||
25 | This module is certainly not ZODB, but can be used for low-load |
|
25 | This module is certainly not ZODB, but can be used for low-load | |
26 | (non-mission-critical) situations where tiny code size trumps the |
|
26 | (non-mission-critical) situations where tiny code size trumps the | |
27 | advanced features of a "real" object database. |
|
27 | advanced features of a "real" object database. | |
28 |
|
28 | |||
29 | Installation guide: easy_install pickleshare |
|
29 | Installation guide: easy_install pickleshare | |
30 |
|
30 | |||
31 | Author: Ville Vainio <vivainio@gmail.com> |
|
31 | Author: Ville Vainio <vivainio@gmail.com> | |
32 | License: MIT open source license. |
|
32 | License: MIT open source license. | |
33 |
|
33 | |||
34 | """ |
|
34 | """ | |
35 |
|
35 | |||
36 | from path import path as Path |
|
36 | from path import path as Path | |
37 | import os,stat,time |
|
37 | import os,stat,time | |
38 | import cPickle as pickle |
|
38 | import cPickle as pickle | |
39 | import UserDict |
|
39 | import UserDict | |
40 | import warnings |
|
40 | import warnings | |
41 | import glob |
|
41 | import glob | |
42 |
|
42 | |||
|
43 | def gethashfile(key): | |||
|
44 | return ("%02x" % abs(hash(key) % 256))[-2:] | |||
|
45 | ||||
43 | class PickleShareDB(UserDict.DictMixin): |
|
46 | class PickleShareDB(UserDict.DictMixin): | |
44 | """ The main 'connection' object for PickleShare database """ |
|
47 | """ The main 'connection' object for PickleShare database """ | |
45 | def __init__(self,root): |
|
48 | def __init__(self,root): | |
46 | """ Return a db object that will manage the specied directory""" |
|
49 | """ Return a db object that will manage the specied directory""" | |
47 | self.root = Path(root).expanduser().abspath() |
|
50 | self.root = Path(root).expanduser().abspath() | |
48 | if not self.root.isdir(): |
|
51 | if not self.root.isdir(): | |
49 | self.root.makedirs() |
|
52 | self.root.makedirs() | |
50 | # cache has { 'key' : (obj, orig_mod_time) } |
|
53 | # cache has { 'key' : (obj, orig_mod_time) } | |
51 | self.cache = {} |
|
54 | self.cache = {} | |
52 |
|
55 | |||
53 | def __getitem__(self,key): |
|
56 | def __getitem__(self,key): | |
54 | """ db['key'] reading """ |
|
57 | """ db['key'] reading """ | |
55 | fil = self.root / key |
|
58 | fil = self.root / key | |
56 | try: |
|
59 | try: | |
57 | mtime = (fil.stat()[stat.ST_MTIME]) |
|
60 | mtime = (fil.stat()[stat.ST_MTIME]) | |
58 | except OSError: |
|
61 | except OSError: | |
59 | raise KeyError(key) |
|
62 | raise KeyError(key) | |
60 |
|
63 | |||
61 | if fil in self.cache and mtime == self.cache[fil][1]: |
|
64 | if fil in self.cache and mtime == self.cache[fil][1]: | |
62 | return self.cache[fil][0] |
|
65 | return self.cache[fil][0] | |
63 | try: |
|
66 | try: | |
64 | # The cached item has expired, need to read |
|
67 | # The cached item has expired, need to read | |
65 | obj = pickle.load(fil.open()) |
|
68 | obj = pickle.load(fil.open()) | |
66 | except: |
|
69 | except: | |
67 | raise KeyError(key) |
|
70 | raise KeyError(key) | |
68 |
|
71 | |||
69 | self.cache[fil] = (obj,mtime) |
|
72 | self.cache[fil] = (obj,mtime) | |
70 | return obj |
|
73 | return obj | |
71 |
|
74 | |||
72 | def __setitem__(self,key,value): |
|
75 | def __setitem__(self,key,value): | |
73 | """ db['key'] = 5 """ |
|
76 | """ db['key'] = 5 """ | |
74 | fil = self.root / key |
|
77 | fil = self.root / key | |
75 | parent = fil.parent |
|
78 | parent = fil.parent | |
76 | if parent and not parent.isdir(): |
|
79 | if parent and not parent.isdir(): | |
77 | parent.makedirs() |
|
80 | parent.makedirs() | |
78 | pickled = pickle.dump(value,fil.open('w')) |
|
81 | pickled = pickle.dump(value,fil.open('w')) | |
79 | try: |
|
82 | try: | |
80 | self.cache[fil] = (value,fil.mtime) |
|
83 | self.cache[fil] = (value,fil.mtime) | |
81 | except OSError,e: |
|
84 | except OSError,e: | |
82 | if e.errno != 2: |
|
85 | if e.errno != 2: | |
83 | raise |
|
86 | raise | |
84 |
|
87 | |||
|
88 | def hset(self, hashroot, key, value): | |||
|
89 | hroot = self.root / hashroot | |||
|
90 | if not hroot.isdir(): | |||
|
91 | hroot.makedirs() | |||
|
92 | hfile = hroot / gethashfile(key) | |||
|
93 | d = self.get(hfile, {}) | |||
|
94 | d.update( {key : value}) | |||
|
95 | self[hfile] = d | |||
|
96 | ||||
|
97 | def hget(self, hashroot, key, default = None): | |||
|
98 | hroot = self.root / hashroot | |||
|
99 | hfile = hroot / gethashfile(key) | |||
|
100 | d = self.get(hfile, None) | |||
|
101 | #print "got dict",d,"from",hfile | |||
|
102 | if d is None: | |||
|
103 | return default | |||
|
104 | return d.get(key, default) | |||
|
105 | ||||
|
106 | def hdict(self, hashroot): | |||
|
107 | buckets = self.keys(hashroot + "/*") | |||
|
108 | hfiles = [f for f in buckets] | |||
|
109 | all = {} | |||
|
110 | for f in hfiles: | |||
|
111 | # print "using",f | |||
|
112 | all.update(self[f]) | |||
|
113 | self.uncache(f) | |||
|
114 | ||||
|
115 | return all | |||
|
116 | ||||
85 | def __delitem__(self,key): |
|
117 | def __delitem__(self,key): | |
86 | """ del db["key"] """ |
|
118 | """ del db["key"] """ | |
87 | fil = self.root / key |
|
119 | fil = self.root / key | |
88 | self.cache.pop(fil,None) |
|
120 | self.cache.pop(fil,None) | |
89 | try: |
|
121 | try: | |
90 | fil.remove() |
|
122 | fil.remove() | |
91 | except OSError: |
|
123 | except OSError: | |
92 | # notfound and permission denied are ok - we |
|
124 | # notfound and permission denied are ok - we | |
93 | # lost, the other process wins the conflict |
|
125 | # lost, the other process wins the conflict | |
94 | pass |
|
126 | pass | |
95 |
|
127 | |||
96 | def _normalized(self, p): |
|
128 | def _normalized(self, p): | |
97 | """ Make a key suitable for user's eyes """ |
|
129 | """ Make a key suitable for user's eyes """ | |
98 | return str(self.root.relpathto(p)).replace('\\','/') |
|
130 | return str(self.root.relpathto(p)).replace('\\','/') | |
99 |
|
131 | |||
100 | def keys(self, globpat = None): |
|
132 | def keys(self, globpat = None): | |
101 | """ All keys in DB, or all keys matching a glob""" |
|
133 | """ All keys in DB, or all keys matching a glob""" | |
102 |
|
134 | |||
103 | if globpat is None: |
|
135 | if globpat is None: | |
104 | files = self.root.walkfiles() |
|
136 | files = self.root.walkfiles() | |
105 | else: |
|
137 | else: | |
106 | files = [Path(p) for p in glob.glob(self.root/globpat)] |
|
138 | files = [Path(p) for p in glob.glob(self.root/globpat)] | |
107 | return [self._normalized(p) for p in files if p.isfile()] |
|
139 | return [self._normalized(p) for p in files if p.isfile()] | |
108 |
|
140 | |||
109 | def uncache(self,*items): |
|
141 | def uncache(self,*items): | |
110 | """ Removes all, or specified items from cache |
|
142 | """ Removes all, or specified items from cache | |
111 |
|
143 | |||
112 | Use this after reading a large amount of large objects |
|
144 | Use this after reading a large amount of large objects | |
113 | to free up memory, when you won't be needing the objects |
|
145 | to free up memory, when you won't be needing the objects | |
114 | for a while. |
|
146 | for a while. | |
115 |
|
147 | |||
116 | """ |
|
148 | """ | |
117 | if not items: |
|
149 | if not items: | |
118 | self.cache = {} |
|
150 | self.cache = {} | |
119 | for it in items: |
|
151 | for it in items: | |
120 | self.cache.pop(it,None) |
|
152 | self.cache.pop(it,None) | |
121 |
|
153 | |||
122 | def waitget(self,key, maxwaittime = 60 ): |
|
154 | def waitget(self,key, maxwaittime = 60 ): | |
123 | """ Wait (poll) for a key to get a value |
|
155 | """ Wait (poll) for a key to get a value | |
124 |
|
156 | |||
125 | Will wait for `maxwaittime` seconds before raising a KeyError. |
|
157 | Will wait for `maxwaittime` seconds before raising a KeyError. | |
126 | The call exits normally if the `key` field in db gets a value |
|
158 | The call exits normally if the `key` field in db gets a value | |
127 | within the timeout period. |
|
159 | within the timeout period. | |
128 |
|
160 | |||
129 | Use this for synchronizing different processes or for ensuring |
|
161 | Use this for synchronizing different processes or for ensuring | |
130 | that an unfortunately timed "db['key'] = newvalue" operation |
|
162 | that an unfortunately timed "db['key'] = newvalue" operation | |
131 | in another process (which causes all 'get' operation to cause a |
|
163 | in another process (which causes all 'get' operation to cause a | |
132 | KeyError for the duration of pickling) won't screw up your program |
|
164 | KeyError for the duration of pickling) won't screw up your program | |
133 | logic. |
|
165 | logic. | |
134 | """ |
|
166 | """ | |
135 |
|
167 | |||
136 | wtimes = [0.2] * 3 + [0.5] * 2 + [1] |
|
168 | wtimes = [0.2] * 3 + [0.5] * 2 + [1] | |
137 | tries = 0 |
|
169 | tries = 0 | |
138 | waited = 0 |
|
170 | waited = 0 | |
139 | while 1: |
|
171 | while 1: | |
140 | try: |
|
172 | try: | |
141 | val = self[key] |
|
173 | val = self[key] | |
142 | return val |
|
174 | return val | |
143 | except KeyError: |
|
175 | except KeyError: | |
144 | pass |
|
176 | pass | |
145 |
|
177 | |||
146 | if waited > maxwaittime: |
|
178 | if waited > maxwaittime: | |
147 | raise KeyError(key) |
|
179 | raise KeyError(key) | |
148 |
|
180 | |||
149 | time.sleep(wtimes[tries]) |
|
181 | time.sleep(wtimes[tries]) | |
150 | waited+=wtimes[tries] |
|
182 | waited+=wtimes[tries] | |
151 | if tries < len(wtimes) -1: |
|
183 | if tries < len(wtimes) -1: | |
152 | tries+=1 |
|
184 | tries+=1 | |
153 |
|
185 | |||
154 | def getlink(self,folder): |
|
186 | def getlink(self,folder): | |
155 | """ Get a convenient link for accessing items """ |
|
187 | """ Get a convenient link for accessing items """ | |
156 | return PickleShareLink(self, folder) |
|
188 | return PickleShareLink(self, folder) | |
157 |
|
189 | |||
158 | def __repr__(self): |
|
190 | def __repr__(self): | |
159 | return "PickleShareDB('%s')" % self.root |
|
191 | return "PickleShareDB('%s')" % self.root | |
160 |
|
192 | |||
161 |
|
193 | |||
162 |
|
194 | |||
163 | class PickleShareLink: |
|
195 | class PickleShareLink: | |
164 | """ A shortdand for accessing nested PickleShare data conveniently. |
|
196 | """ A shortdand for accessing nested PickleShare data conveniently. | |
165 |
|
197 | |||
166 | Created through PickleShareDB.getlink(), example:: |
|
198 | Created through PickleShareDB.getlink(), example:: | |
167 |
|
199 | |||
168 | lnk = db.getlink('myobjects/test') |
|
200 | lnk = db.getlink('myobjects/test') | |
169 | lnk.foo = 2 |
|
201 | lnk.foo = 2 | |
170 | lnk.bar = lnk.foo + 5 |
|
202 | lnk.bar = lnk.foo + 5 | |
171 |
|
203 | |||
172 | """ |
|
204 | """ | |
173 | def __init__(self, db, keydir ): |
|
205 | def __init__(self, db, keydir ): | |
174 | self.__dict__.update(locals()) |
|
206 | self.__dict__.update(locals()) | |
175 |
|
207 | |||
176 | def __getattr__(self,key): |
|
208 | def __getattr__(self,key): | |
177 | return self.__dict__['db'][self.__dict__['keydir']+'/' + key] |
|
209 | return self.__dict__['db'][self.__dict__['keydir']+'/' + key] | |
178 | def __setattr__(self,key,val): |
|
210 | def __setattr__(self,key,val): | |
179 | self.db[self.keydir+'/' + key] = val |
|
211 | self.db[self.keydir+'/' + key] = val | |
180 | def __repr__(self): |
|
212 | def __repr__(self): | |
181 | db = self.__dict__['db'] |
|
213 | db = self.__dict__['db'] | |
182 | keys = db.keys( self.__dict__['keydir'] +"/*") |
|
214 | keys = db.keys( self.__dict__['keydir'] +"/*") | |
183 | return "<PickleShareLink '%s': %s>" % ( |
|
215 | return "<PickleShareLink '%s': %s>" % ( | |
184 | self.__dict__['keydir'], |
|
216 | self.__dict__['keydir'], | |
185 | ";".join([Path(k).basename() for k in keys])) |
|
217 | ";".join([Path(k).basename() for k in keys])) | |
186 |
|
218 | |||
187 |
|
219 | |||
188 | def test(): |
|
220 | def test(): | |
189 | db = PickleShareDB('~/testpickleshare') |
|
221 | db = PickleShareDB('~/testpickleshare') | |
190 | db.clear() |
|
222 | db.clear() | |
191 | print "Should be empty:",db.items() |
|
223 | print "Should be empty:",db.items() | |
192 | db['hello'] = 15 |
|
224 | db['hello'] = 15 | |
193 | db['aku ankka'] = [1,2,313] |
|
225 | db['aku ankka'] = [1,2,313] | |
194 | db['paths/nest/ok/keyname'] = [1,(5,46)] |
|
226 | db['paths/nest/ok/keyname'] = [1,(5,46)] | |
|
227 | db.hset('hash', 'aku', 12) | |||
|
228 | db.hset('hash', 'ankka', 313) | |||
|
229 | print "12 =",db.hget('hash','aku') | |||
|
230 | print "313 =",db.hget('hash','ankka') | |||
|
231 | print "all hashed",db.hdict('hash') | |||
195 | print db.keys() |
|
232 | print db.keys() | |
196 | print db.keys('paths/nest/ok/k*') |
|
233 | print db.keys('paths/nest/ok/k*') | |
197 | print dict(db) # snapsot of whole db |
|
234 | print dict(db) # snapsot of whole db | |
198 | db.uncache() # frees memory, causes re-reads later |
|
235 | db.uncache() # frees memory, causes re-reads later | |
199 |
|
236 | |||
200 | # shorthand for accessing deeply nested files |
|
237 | # shorthand for accessing deeply nested files | |
201 | lnk = db.getlink('myobjects/test') |
|
238 | lnk = db.getlink('myobjects/test') | |
202 | lnk.foo = 2 |
|
239 | lnk.foo = 2 | |
203 | lnk.bar = lnk.foo + 5 |
|
240 | lnk.bar = lnk.foo + 5 | |
204 | print lnk.bar # 7 |
|
241 | print lnk.bar # 7 | |
205 |
|
242 | |||
206 | def stress(): |
|
243 | def stress(): | |
207 | db = PickleShareDB('~/fsdbtest') |
|
244 | db = PickleShareDB('~/fsdbtest') | |
208 | import time,sys |
|
245 | import time,sys | |
209 | for i in range(1000): |
|
246 | for i in range(1000): | |
210 |
for j in range( |
|
247 | for j in range(1000): | |
211 | if i % 15 == 0 and i < 200: |
|
248 | if i % 15 == 0 and i < 200: | |
212 | if str(j) in db: |
|
249 | if str(j) in db: | |
213 | del db[str(j)] |
|
250 | del db[str(j)] | |
214 | continue |
|
251 | continue | |
215 |
|
252 | |||
216 | if j%33 == 0: |
|
253 | if j%33 == 0: | |
217 | time.sleep(0.02) |
|
254 | time.sleep(0.02) | |
218 |
|
255 | |||
219 | db[str(j)] = db.get(str(j), []) + [(i,j,"proc %d" % os.getpid())] |
|
256 | db[str(j)] = db.get(str(j), []) + [(i,j,"proc %d" % os.getpid())] | |
|
257 | db.hset('hash',j, db.hget('hash',j,15) + 1 ) | |||
|
258 | ||||
220 | print i, |
|
259 | print i, | |
221 | sys.stdout.flush() |
|
260 | sys.stdout.flush() | |
222 | if i % 10 == 0: |
|
261 | if i % 10 == 0: | |
223 | db.uncache() |
|
262 | db.uncache() | |
224 |
|
263 | |||
225 | def main(): |
|
264 | def main(): | |
226 | import textwrap |
|
265 | import textwrap | |
227 | usage = textwrap.dedent("""\ |
|
266 | usage = textwrap.dedent("""\ | |
228 | pickleshare - manage PickleShare databases |
|
267 | pickleshare - manage PickleShare databases | |
229 |
|
268 | |||
230 | Usage: |
|
269 | Usage: | |
231 |
|
270 | |||
232 | pickleshare dump /path/to/db > dump.txt |
|
271 | pickleshare dump /path/to/db > dump.txt | |
233 | pickleshare load /path/to/db < dump.txt |
|
272 | pickleshare load /path/to/db < dump.txt | |
234 | pickleshare test /path/to/db |
|
273 | pickleshare test /path/to/db | |
235 | """) |
|
274 | """) | |
236 | DB = PickleShareDB |
|
275 | DB = PickleShareDB | |
237 | import sys |
|
276 | import sys | |
238 | if len(sys.argv) < 2: |
|
277 | if len(sys.argv) < 2: | |
239 | print usage |
|
278 | print usage | |
240 | return |
|
279 | return | |
241 |
|
280 | |||
242 | cmd = sys.argv[1] |
|
281 | cmd = sys.argv[1] | |
243 | args = sys.argv[2:] |
|
282 | args = sys.argv[2:] | |
244 | if cmd == 'dump': |
|
283 | if cmd == 'dump': | |
245 | if not args: args= ['.'] |
|
284 | if not args: args= ['.'] | |
246 | db = DB(args[0]) |
|
285 | db = DB(args[0]) | |
247 | import pprint |
|
286 | import pprint | |
248 | pprint.pprint(db.items()) |
|
287 | pprint.pprint(db.items()) | |
249 | elif cmd == 'load': |
|
288 | elif cmd == 'load': | |
250 | cont = sys.stdin.read() |
|
289 | cont = sys.stdin.read() | |
251 | db = DB(args[0]) |
|
290 | db = DB(args[0]) | |
252 | data = eval(cont) |
|
291 | data = eval(cont) | |
253 | db.clear() |
|
292 | db.clear() | |
254 | for k,v in db.items(): |
|
293 | for k,v in db.items(): | |
255 | db[k] = v |
|
294 | db[k] = v | |
256 | elif cmd == 'testwait': |
|
295 | elif cmd == 'testwait': | |
257 | db = DB(args[0]) |
|
296 | db = DB(args[0]) | |
258 | db.clear() |
|
297 | db.clear() | |
259 | print db.waitget('250') |
|
298 | print db.waitget('250') | |
260 | elif cmd == 'test': |
|
299 | elif cmd == 'test': | |
261 | test() |
|
300 | test() | |
262 | stress() |
|
301 | stress() | |
263 |
|
302 | |||
264 | if __name__== "__main__": |
|
303 | if __name__== "__main__": | |
265 | main() |
|
304 | main() | |
266 |
|
305 | |||
267 | No newline at end of file |
|
306 |
General Comments 0
You need to be logged in to leave comments.
Login now