Show More
@@ -1,458 +1,458 b'' | |||||
1 | from __future__ import absolute_import |
|
1 | from __future__ import absolute_import | |
2 |
|
2 | |||
3 | import struct |
|
3 | import struct | |
4 | import zlib |
|
4 | import zlib | |
5 |
|
5 | |||
6 | from mercurial.node import hex, nullid |
|
6 | from mercurial.node import hex, nullid | |
7 | from mercurial.i18n import _ |
|
7 | from mercurial.i18n import _ | |
8 | from mercurial import ( |
|
8 | from mercurial import ( | |
9 | pycompat, |
|
9 | pycompat, | |
10 | util, |
|
10 | util, | |
11 | ) |
|
11 | ) | |
12 | from . import ( |
|
12 | from . import ( | |
13 | basepack, |
|
13 | basepack, | |
14 | constants, |
|
14 | constants, | |
15 | shallowutil, |
|
15 | shallowutil, | |
16 | ) |
|
16 | ) | |
17 |
|
17 | |||
18 | NODELENGTH = 20 |
|
18 | NODELENGTH = 20 | |
19 |
|
19 | |||
20 | # The indicator value in the index for a fulltext entry. |
|
20 | # The indicator value in the index for a fulltext entry. | |
21 | FULLTEXTINDEXMARK = -1 |
|
21 | FULLTEXTINDEXMARK = -1 | |
22 | NOBASEINDEXMARK = -2 |
|
22 | NOBASEINDEXMARK = -2 | |
23 |
|
23 | |||
24 | INDEXSUFFIX = '.dataidx' |
|
24 | INDEXSUFFIX = '.dataidx' | |
25 | PACKSUFFIX = '.datapack' |
|
25 | PACKSUFFIX = '.datapack' | |
26 |
|
26 | |||
27 | class datapackstore(basepack.basepackstore): |
|
27 | class datapackstore(basepack.basepackstore): | |
28 | INDEXSUFFIX = INDEXSUFFIX |
|
28 | INDEXSUFFIX = INDEXSUFFIX | |
29 | PACKSUFFIX = PACKSUFFIX |
|
29 | PACKSUFFIX = PACKSUFFIX | |
30 |
|
30 | |||
31 | def __init__(self, ui, path): |
|
31 | def __init__(self, ui, path): | |
32 | super(datapackstore, self).__init__(ui, path) |
|
32 | super(datapackstore, self).__init__(ui, path) | |
33 |
|
33 | |||
34 | def getpack(self, path): |
|
34 | def getpack(self, path): | |
35 | return datapack(path) |
|
35 | return datapack(path) | |
36 |
|
36 | |||
37 | def get(self, name, node): |
|
37 | def get(self, name, node): | |
38 | raise RuntimeError("must use getdeltachain with datapackstore") |
|
38 | raise RuntimeError("must use getdeltachain with datapackstore") | |
39 |
|
39 | |||
40 | def getmeta(self, name, node): |
|
40 | def getmeta(self, name, node): | |
41 | for pack in self.packs: |
|
41 | for pack in self.packs: | |
42 | try: |
|
42 | try: | |
43 | return pack.getmeta(name, node) |
|
43 | return pack.getmeta(name, node) | |
44 | except KeyError: |
|
44 | except KeyError: | |
45 | pass |
|
45 | pass | |
46 |
|
46 | |||
47 | for pack in self.refresh(): |
|
47 | for pack in self.refresh(): | |
48 | try: |
|
48 | try: | |
49 | return pack.getmeta(name, node) |
|
49 | return pack.getmeta(name, node) | |
50 | except KeyError: |
|
50 | except KeyError: | |
51 | pass |
|
51 | pass | |
52 |
|
52 | |||
53 | raise KeyError((name, hex(node))) |
|
53 | raise KeyError((name, hex(node))) | |
54 |
|
54 | |||
55 | def getdelta(self, name, node): |
|
55 | def getdelta(self, name, node): | |
56 | for pack in self.packs: |
|
56 | for pack in self.packs: | |
57 | try: |
|
57 | try: | |
58 | return pack.getdelta(name, node) |
|
58 | return pack.getdelta(name, node) | |
59 | except KeyError: |
|
59 | except KeyError: | |
60 | pass |
|
60 | pass | |
61 |
|
61 | |||
62 | for pack in self.refresh(): |
|
62 | for pack in self.refresh(): | |
63 | try: |
|
63 | try: | |
64 | return pack.getdelta(name, node) |
|
64 | return pack.getdelta(name, node) | |
65 | except KeyError: |
|
65 | except KeyError: | |
66 | pass |
|
66 | pass | |
67 |
|
67 | |||
68 | raise KeyError((name, hex(node))) |
|
68 | raise KeyError((name, hex(node))) | |
69 |
|
69 | |||
70 | def getdeltachain(self, name, node): |
|
70 | def getdeltachain(self, name, node): | |
71 | for pack in self.packs: |
|
71 | for pack in self.packs: | |
72 | try: |
|
72 | try: | |
73 | return pack.getdeltachain(name, node) |
|
73 | return pack.getdeltachain(name, node) | |
74 | except KeyError: |
|
74 | except KeyError: | |
75 | pass |
|
75 | pass | |
76 |
|
76 | |||
77 | for pack in self.refresh(): |
|
77 | for pack in self.refresh(): | |
78 | try: |
|
78 | try: | |
79 | return pack.getdeltachain(name, node) |
|
79 | return pack.getdeltachain(name, node) | |
80 | except KeyError: |
|
80 | except KeyError: | |
81 | pass |
|
81 | pass | |
82 |
|
82 | |||
83 | raise KeyError((name, hex(node))) |
|
83 | raise KeyError((name, hex(node))) | |
84 |
|
84 | |||
85 | def add(self, name, node, data): |
|
85 | def add(self, name, node, data): | |
86 | raise RuntimeError("cannot add to datapackstore") |
|
86 | raise RuntimeError("cannot add to datapackstore") | |
87 |
|
87 | |||
88 | class datapack(basepack.basepack): |
|
88 | class datapack(basepack.basepack): | |
89 | INDEXSUFFIX = INDEXSUFFIX |
|
89 | INDEXSUFFIX = INDEXSUFFIX | |
90 | PACKSUFFIX = PACKSUFFIX |
|
90 | PACKSUFFIX = PACKSUFFIX | |
91 |
|
91 | |||
92 | # Format is <node><delta offset><pack data offset><pack data size> |
|
92 | # Format is <node><delta offset><pack data offset><pack data size> | |
93 | # See the mutabledatapack doccomment for more details. |
|
93 | # See the mutabledatapack doccomment for more details. | |
94 | INDEXFORMAT = '!20siQQ' |
|
94 | INDEXFORMAT = '!20siQQ' | |
95 | INDEXENTRYLENGTH = 40 |
|
95 | INDEXENTRYLENGTH = 40 | |
96 |
|
96 | |||
97 | SUPPORTED_VERSIONS = [2] |
|
97 | SUPPORTED_VERSIONS = [2] | |
98 |
|
98 | |||
99 | def getmissing(self, keys): |
|
99 | def getmissing(self, keys): | |
100 | missing = [] |
|
100 | missing = [] | |
101 | for name, node in keys: |
|
101 | for name, node in keys: | |
102 | value = self._find(node) |
|
102 | value = self._find(node) | |
103 | if not value: |
|
103 | if not value: | |
104 | missing.append((name, node)) |
|
104 | missing.append((name, node)) | |
105 |
|
105 | |||
106 | return missing |
|
106 | return missing | |
107 |
|
107 | |||
108 | def get(self, name, node): |
|
108 | def get(self, name, node): | |
109 | raise RuntimeError("must use getdeltachain with datapack (%s:%s)" |
|
109 | raise RuntimeError("must use getdeltachain with datapack (%s:%s)" | |
110 | % (name, hex(node))) |
|
110 | % (name, hex(node))) | |
111 |
|
111 | |||
112 | def getmeta(self, name, node): |
|
112 | def getmeta(self, name, node): | |
113 | value = self._find(node) |
|
113 | value = self._find(node) | |
114 | if value is None: |
|
114 | if value is None: | |
115 | raise KeyError((name, hex(node))) |
|
115 | raise KeyError((name, hex(node))) | |
116 |
|
116 | |||
117 | node, deltabaseoffset, offset, size = value |
|
117 | node, deltabaseoffset, offset, size = value | |
118 | rawentry = self._data[offset:offset + size] |
|
118 | rawentry = self._data[offset:offset + size] | |
119 |
|
119 | |||
120 | # see docstring of mutabledatapack for the format |
|
120 | # see docstring of mutabledatapack for the format | |
121 | offset = 0 |
|
121 | offset = 0 | |
122 | offset += struct.unpack_from('!H', rawentry, offset)[0] + 2 # filename |
|
122 | offset += struct.unpack_from('!H', rawentry, offset)[0] + 2 # filename | |
123 | offset += 40 # node, deltabase node |
|
123 | offset += 40 # node, deltabase node | |
124 | offset += struct.unpack_from('!Q', rawentry, offset)[0] + 8 # delta |
|
124 | offset += struct.unpack_from('!Q', rawentry, offset)[0] + 8 # delta | |
125 |
|
125 | |||
126 | metalen = struct.unpack_from('!I', rawentry, offset)[0] |
|
126 | metalen = struct.unpack_from('!I', rawentry, offset)[0] | |
127 | offset += 4 |
|
127 | offset += 4 | |
128 |
|
128 | |||
129 | meta = shallowutil.parsepackmeta(rawentry[offset:offset + metalen]) |
|
129 | meta = shallowutil.parsepackmeta(rawentry[offset:offset + metalen]) | |
130 |
|
130 | |||
131 | return meta |
|
131 | return meta | |
132 |
|
132 | |||
133 | def getdelta(self, name, node): |
|
133 | def getdelta(self, name, node): | |
134 | value = self._find(node) |
|
134 | value = self._find(node) | |
135 | if value is None: |
|
135 | if value is None: | |
136 | raise KeyError((name, hex(node))) |
|
136 | raise KeyError((name, hex(node))) | |
137 |
|
137 | |||
138 | node, deltabaseoffset, offset, size = value |
|
138 | node, deltabaseoffset, offset, size = value | |
139 | entry = self._readentry(offset, size, getmeta=True) |
|
139 | entry = self._readentry(offset, size, getmeta=True) | |
140 | filename, node, deltabasenode, delta, meta = entry |
|
140 | filename, node, deltabasenode, delta, meta = entry | |
141 |
|
141 | |||
142 | # If we've read a lot of data from the mmap, free some memory. |
|
142 | # If we've read a lot of data from the mmap, free some memory. | |
143 | self.freememory() |
|
143 | self.freememory() | |
144 |
|
144 | |||
145 | return delta, filename, deltabasenode, meta |
|
145 | return delta, filename, deltabasenode, meta | |
146 |
|
146 | |||
147 | def getdeltachain(self, name, node): |
|
147 | def getdeltachain(self, name, node): | |
148 | value = self._find(node) |
|
148 | value = self._find(node) | |
149 | if value is None: |
|
149 | if value is None: | |
150 | raise KeyError((name, hex(node))) |
|
150 | raise KeyError((name, hex(node))) | |
151 |
|
151 | |||
152 | params = self.params |
|
152 | params = self.params | |
153 |
|
153 | |||
154 | # Precompute chains |
|
154 | # Precompute chains | |
155 | chain = [value] |
|
155 | chain = [value] | |
156 | deltabaseoffset = value[1] |
|
156 | deltabaseoffset = value[1] | |
157 | entrylen = self.INDEXENTRYLENGTH |
|
157 | entrylen = self.INDEXENTRYLENGTH | |
158 | while (deltabaseoffset != FULLTEXTINDEXMARK |
|
158 | while (deltabaseoffset != FULLTEXTINDEXMARK | |
159 | and deltabaseoffset != NOBASEINDEXMARK): |
|
159 | and deltabaseoffset != NOBASEINDEXMARK): | |
160 | loc = params.indexstart + deltabaseoffset |
|
160 | loc = params.indexstart + deltabaseoffset | |
161 | value = struct.unpack(self.INDEXFORMAT, |
|
161 | value = struct.unpack(self.INDEXFORMAT, | |
162 | self._index[loc:loc + entrylen]) |
|
162 | self._index[loc:loc + entrylen]) | |
163 | deltabaseoffset = value[1] |
|
163 | deltabaseoffset = value[1] | |
164 | chain.append(value) |
|
164 | chain.append(value) | |
165 |
|
165 | |||
166 | # Read chain data |
|
166 | # Read chain data | |
167 | deltachain = [] |
|
167 | deltachain = [] | |
168 | for node, deltabaseoffset, offset, size in chain: |
|
168 | for node, deltabaseoffset, offset, size in chain: | |
169 | filename, node, deltabasenode, delta = self._readentry(offset, size) |
|
169 | filename, node, deltabasenode, delta = self._readentry(offset, size) | |
170 | deltachain.append((filename, node, filename, deltabasenode, delta)) |
|
170 | deltachain.append((filename, node, filename, deltabasenode, delta)) | |
171 |
|
171 | |||
172 | # If we've read a lot of data from the mmap, free some memory. |
|
172 | # If we've read a lot of data from the mmap, free some memory. | |
173 | self.freememory() |
|
173 | self.freememory() | |
174 |
|
174 | |||
175 | return deltachain |
|
175 | return deltachain | |
176 |
|
176 | |||
177 | def _readentry(self, offset, size, getmeta=False): |
|
177 | def _readentry(self, offset, size, getmeta=False): | |
178 | rawentry = self._data[offset:offset + size] |
|
178 | rawentry = self._data[offset:offset + size] | |
179 | self._pagedin += len(rawentry) |
|
179 | self._pagedin += len(rawentry) | |
180 |
|
180 | |||
181 | # <2 byte len> + <filename> |
|
181 | # <2 byte len> + <filename> | |
182 | lengthsize = 2 |
|
182 | lengthsize = 2 | |
183 | filenamelen = struct.unpack('!H', rawentry[:2])[0] |
|
183 | filenamelen = struct.unpack('!H', rawentry[:2])[0] | |
184 | filename = rawentry[lengthsize:lengthsize + filenamelen] |
|
184 | filename = rawentry[lengthsize:lengthsize + filenamelen] | |
185 |
|
185 | |||
186 | # <20 byte node> + <20 byte deltabase> |
|
186 | # <20 byte node> + <20 byte deltabase> | |
187 | nodestart = lengthsize + filenamelen |
|
187 | nodestart = lengthsize + filenamelen | |
188 | deltabasestart = nodestart + NODELENGTH |
|
188 | deltabasestart = nodestart + NODELENGTH | |
189 | node = rawentry[nodestart:deltabasestart] |
|
189 | node = rawentry[nodestart:deltabasestart] | |
190 | deltabasenode = rawentry[deltabasestart:deltabasestart + NODELENGTH] |
|
190 | deltabasenode = rawentry[deltabasestart:deltabasestart + NODELENGTH] | |
191 |
|
191 | |||
192 | # <8 byte len> + <delta> |
|
192 | # <8 byte len> + <delta> | |
193 | deltastart = deltabasestart + NODELENGTH |
|
193 | deltastart = deltabasestart + NODELENGTH | |
194 | rawdeltalen = rawentry[deltastart:deltastart + 8] |
|
194 | rawdeltalen = rawentry[deltastart:deltastart + 8] | |
195 | deltalen = struct.unpack('!Q', rawdeltalen)[0] |
|
195 | deltalen = struct.unpack('!Q', rawdeltalen)[0] | |
196 |
|
196 | |||
197 | delta = rawentry[deltastart + 8:deltastart + 8 + deltalen] |
|
197 | delta = rawentry[deltastart + 8:deltastart + 8 + deltalen] | |
198 | delta = self._decompress(delta) |
|
198 | delta = self._decompress(delta) | |
199 |
|
199 | |||
200 | if getmeta: |
|
200 | if getmeta: | |
201 | metastart = deltastart + 8 + deltalen |
|
201 | metastart = deltastart + 8 + deltalen | |
202 | metalen = struct.unpack_from('!I', rawentry, metastart)[0] |
|
202 | metalen = struct.unpack_from('!I', rawentry, metastart)[0] | |
203 |
|
203 | |||
204 | rawmeta = rawentry[metastart + 4:metastart + 4 + metalen] |
|
204 | rawmeta = rawentry[metastart + 4:metastart + 4 + metalen] | |
205 | meta = shallowutil.parsepackmeta(rawmeta) |
|
205 | meta = shallowutil.parsepackmeta(rawmeta) | |
206 | return filename, node, deltabasenode, delta, meta |
|
206 | return filename, node, deltabasenode, delta, meta | |
207 | else: |
|
207 | else: | |
208 | return filename, node, deltabasenode, delta |
|
208 | return filename, node, deltabasenode, delta | |
209 |
|
209 | |||
210 | def _decompress(self, data): |
|
210 | def _decompress(self, data): | |
211 | return zlib.decompress(data) |
|
211 | return zlib.decompress(data) | |
212 |
|
212 | |||
213 | def add(self, name, node, data): |
|
213 | def add(self, name, node, data): | |
214 | raise RuntimeError("cannot add to datapack (%s:%s)" % (name, node)) |
|
214 | raise RuntimeError("cannot add to datapack (%s:%s)" % (name, node)) | |
215 |
|
215 | |||
216 | def _find(self, node): |
|
216 | def _find(self, node): | |
217 | params = self.params |
|
217 | params = self.params | |
218 | fanoutkey = struct.unpack(params.fanoutstruct, |
|
218 | fanoutkey = struct.unpack(params.fanoutstruct, | |
219 | node[:params.fanoutprefix])[0] |
|
219 | node[:params.fanoutprefix])[0] | |
220 | fanout = self._fanouttable |
|
220 | fanout = self._fanouttable | |
221 |
|
221 | |||
222 | start = fanout[fanoutkey] + params.indexstart |
|
222 | start = fanout[fanoutkey] + params.indexstart | |
223 | indexend = self._indexend |
|
223 | indexend = self._indexend | |
224 |
|
224 | |||
225 | # Scan forward to find the first non-same entry, which is the upper |
|
225 | # Scan forward to find the first non-same entry, which is the upper | |
226 | # bound. |
|
226 | # bound. | |
227 | for i in pycompat.xrange(fanoutkey + 1, params.fanoutcount): |
|
227 | for i in pycompat.xrange(fanoutkey + 1, params.fanoutcount): | |
228 | end = fanout[i] + params.indexstart |
|
228 | end = fanout[i] + params.indexstart | |
229 | if end != start: |
|
229 | if end != start: | |
230 | break |
|
230 | break | |
231 | else: |
|
231 | else: | |
232 | end = indexend |
|
232 | end = indexend | |
233 |
|
233 | |||
234 | # Bisect between start and end to find node |
|
234 | # Bisect between start and end to find node | |
235 | index = self._index |
|
235 | index = self._index | |
236 | startnode = index[start:start + NODELENGTH] |
|
236 | startnode = index[start:start + NODELENGTH] | |
237 | endnode = index[end:end + NODELENGTH] |
|
237 | endnode = index[end:end + NODELENGTH] | |
238 | entrylen = self.INDEXENTRYLENGTH |
|
238 | entrylen = self.INDEXENTRYLENGTH | |
239 | if startnode == node: |
|
239 | if startnode == node: | |
240 | entry = index[start:start + entrylen] |
|
240 | entry = index[start:start + entrylen] | |
241 | elif endnode == node: |
|
241 | elif endnode == node: | |
242 | entry = index[end:end + entrylen] |
|
242 | entry = index[end:end + entrylen] | |
243 | else: |
|
243 | else: | |
244 | while start < end - entrylen: |
|
244 | while start < end - entrylen: | |
245 |
mid = start |
|
245 | mid = start + (end - start) // 2 | |
246 |
mid = |
|
246 | mid = mid - ((mid - params.indexstart) % entrylen) | |
247 | midnode = index[mid:mid + NODELENGTH] |
|
247 | midnode = index[mid:mid + NODELENGTH] | |
248 | if midnode == node: |
|
248 | if midnode == node: | |
249 | entry = index[mid:mid + entrylen] |
|
249 | entry = index[mid:mid + entrylen] | |
250 | break |
|
250 | break | |
251 | if node > midnode: |
|
251 | if node > midnode: | |
252 | start = mid |
|
252 | start = mid | |
253 | elif node < midnode: |
|
253 | elif node < midnode: | |
254 | end = mid |
|
254 | end = mid | |
255 | else: |
|
255 | else: | |
256 | return None |
|
256 | return None | |
257 |
|
257 | |||
258 | return struct.unpack(self.INDEXFORMAT, entry) |
|
258 | return struct.unpack(self.INDEXFORMAT, entry) | |
259 |
|
259 | |||
260 | def markledger(self, ledger, options=None): |
|
260 | def markledger(self, ledger, options=None): | |
261 | for filename, node in self: |
|
261 | for filename, node in self: | |
262 | ledger.markdataentry(self, filename, node) |
|
262 | ledger.markdataentry(self, filename, node) | |
263 |
|
263 | |||
264 | def cleanup(self, ledger): |
|
264 | def cleanup(self, ledger): | |
265 | entries = ledger.sources.get(self, []) |
|
265 | entries = ledger.sources.get(self, []) | |
266 | allkeys = set(self) |
|
266 | allkeys = set(self) | |
267 | repackedkeys = set((e.filename, e.node) for e in entries if |
|
267 | repackedkeys = set((e.filename, e.node) for e in entries if | |
268 | e.datarepacked or e.gced) |
|
268 | e.datarepacked or e.gced) | |
269 |
|
269 | |||
270 | if len(allkeys - repackedkeys) == 0: |
|
270 | if len(allkeys - repackedkeys) == 0: | |
271 | if self.path not in ledger.created: |
|
271 | if self.path not in ledger.created: | |
272 | util.unlinkpath(self.indexpath, ignoremissing=True) |
|
272 | util.unlinkpath(self.indexpath, ignoremissing=True) | |
273 | util.unlinkpath(self.packpath, ignoremissing=True) |
|
273 | util.unlinkpath(self.packpath, ignoremissing=True) | |
274 |
|
274 | |||
275 | def __iter__(self): |
|
275 | def __iter__(self): | |
276 | for f, n, deltabase, deltalen in self.iterentries(): |
|
276 | for f, n, deltabase, deltalen in self.iterentries(): | |
277 | yield f, n |
|
277 | yield f, n | |
278 |
|
278 | |||
279 | def iterentries(self): |
|
279 | def iterentries(self): | |
280 | # Start at 1 to skip the header |
|
280 | # Start at 1 to skip the header | |
281 | offset = 1 |
|
281 | offset = 1 | |
282 | data = self._data |
|
282 | data = self._data | |
283 | while offset < self.datasize: |
|
283 | while offset < self.datasize: | |
284 | oldoffset = offset |
|
284 | oldoffset = offset | |
285 |
|
285 | |||
286 | # <2 byte len> + <filename> |
|
286 | # <2 byte len> + <filename> | |
287 | filenamelen = struct.unpack('!H', data[offset:offset + 2])[0] |
|
287 | filenamelen = struct.unpack('!H', data[offset:offset + 2])[0] | |
288 | offset += 2 |
|
288 | offset += 2 | |
289 | filename = data[offset:offset + filenamelen] |
|
289 | filename = data[offset:offset + filenamelen] | |
290 | offset += filenamelen |
|
290 | offset += filenamelen | |
291 |
|
291 | |||
292 | # <20 byte node> |
|
292 | # <20 byte node> | |
293 | node = data[offset:offset + constants.NODESIZE] |
|
293 | node = data[offset:offset + constants.NODESIZE] | |
294 | offset += constants.NODESIZE |
|
294 | offset += constants.NODESIZE | |
295 | # <20 byte deltabase> |
|
295 | # <20 byte deltabase> | |
296 | deltabase = data[offset:offset + constants.NODESIZE] |
|
296 | deltabase = data[offset:offset + constants.NODESIZE] | |
297 | offset += constants.NODESIZE |
|
297 | offset += constants.NODESIZE | |
298 |
|
298 | |||
299 | # <8 byte len> + <delta> |
|
299 | # <8 byte len> + <delta> | |
300 | rawdeltalen = data[offset:offset + 8] |
|
300 | rawdeltalen = data[offset:offset + 8] | |
301 | deltalen = struct.unpack('!Q', rawdeltalen)[0] |
|
301 | deltalen = struct.unpack('!Q', rawdeltalen)[0] | |
302 | offset += 8 |
|
302 | offset += 8 | |
303 |
|
303 | |||
304 | # TODO(augie): we should store a header that is the |
|
304 | # TODO(augie): we should store a header that is the | |
305 | # uncompressed size. |
|
305 | # uncompressed size. | |
306 | uncompressedlen = len(self._decompress( |
|
306 | uncompressedlen = len(self._decompress( | |
307 | data[offset:offset + deltalen])) |
|
307 | data[offset:offset + deltalen])) | |
308 | offset += deltalen |
|
308 | offset += deltalen | |
309 |
|
309 | |||
310 | # <4 byte len> + <metadata-list> |
|
310 | # <4 byte len> + <metadata-list> | |
311 | metalen = struct.unpack_from('!I', data, offset)[0] |
|
311 | metalen = struct.unpack_from('!I', data, offset)[0] | |
312 | offset += 4 + metalen |
|
312 | offset += 4 + metalen | |
313 |
|
313 | |||
314 | yield (filename, node, deltabase, uncompressedlen) |
|
314 | yield (filename, node, deltabase, uncompressedlen) | |
315 |
|
315 | |||
316 | # If we've read a lot of data from the mmap, free some memory. |
|
316 | # If we've read a lot of data from the mmap, free some memory. | |
317 | self._pagedin += offset - oldoffset |
|
317 | self._pagedin += offset - oldoffset | |
318 | if self.freememory(): |
|
318 | if self.freememory(): | |
319 | data = self._data |
|
319 | data = self._data | |
320 |
|
320 | |||
321 | class mutabledatapack(basepack.mutablebasepack): |
|
321 | class mutabledatapack(basepack.mutablebasepack): | |
322 | """A class for constructing and serializing a datapack file and index. |
|
322 | """A class for constructing and serializing a datapack file and index. | |
323 |
|
323 | |||
324 | A datapack is a pair of files that contain the revision contents for various |
|
324 | A datapack is a pair of files that contain the revision contents for various | |
325 | file revisions in Mercurial. It contains only revision contents (like file |
|
325 | file revisions in Mercurial. It contains only revision contents (like file | |
326 | contents), not any history information. |
|
326 | contents), not any history information. | |
327 |
|
327 | |||
328 | It consists of two files, with the following format. All bytes are in |
|
328 | It consists of two files, with the following format. All bytes are in | |
329 | network byte order (big endian). |
|
329 | network byte order (big endian). | |
330 |
|
330 | |||
331 | .datapack |
|
331 | .datapack | |
332 | The pack itself is a series of revision deltas with some basic header |
|
332 | The pack itself is a series of revision deltas with some basic header | |
333 | information on each. A revision delta may be a fulltext, represented by |
|
333 | information on each. A revision delta may be a fulltext, represented by | |
334 | a deltabasenode equal to the nullid. |
|
334 | a deltabasenode equal to the nullid. | |
335 |
|
335 | |||
336 | datapack = <version: 1 byte> |
|
336 | datapack = <version: 1 byte> | |
337 | [<revision>,...] |
|
337 | [<revision>,...] | |
338 | revision = <filename len: 2 byte unsigned int> |
|
338 | revision = <filename len: 2 byte unsigned int> | |
339 | <filename> |
|
339 | <filename> | |
340 | <node: 20 byte> |
|
340 | <node: 20 byte> | |
341 | <deltabasenode: 20 byte> |
|
341 | <deltabasenode: 20 byte> | |
342 | <delta len: 8 byte unsigned int> |
|
342 | <delta len: 8 byte unsigned int> | |
343 | <delta> |
|
343 | <delta> | |
344 | <metadata-list len: 4 byte unsigned int> [1] |
|
344 | <metadata-list len: 4 byte unsigned int> [1] | |
345 | <metadata-list> [1] |
|
345 | <metadata-list> [1] | |
346 | metadata-list = [<metadata-item>, ...] |
|
346 | metadata-list = [<metadata-item>, ...] | |
347 | metadata-item = <metadata-key: 1 byte> |
|
347 | metadata-item = <metadata-key: 1 byte> | |
348 | <metadata-value len: 2 byte unsigned> |
|
348 | <metadata-value len: 2 byte unsigned> | |
349 | <metadata-value> |
|
349 | <metadata-value> | |
350 |
|
350 | |||
351 | metadata-key could be METAKEYFLAG or METAKEYSIZE or other single byte |
|
351 | metadata-key could be METAKEYFLAG or METAKEYSIZE or other single byte | |
352 | value in the future. |
|
352 | value in the future. | |
353 |
|
353 | |||
354 | .dataidx |
|
354 | .dataidx | |
355 | The index file consists of two parts, the fanout and the index. |
|
355 | The index file consists of two parts, the fanout and the index. | |
356 |
|
356 | |||
357 | The index is a list of index entries, sorted by node (one per revision |
|
357 | The index is a list of index entries, sorted by node (one per revision | |
358 | in the pack). Each entry has: |
|
358 | in the pack). Each entry has: | |
359 |
|
359 | |||
360 | - node (The 20 byte node of the entry; i.e. the commit hash, file node |
|
360 | - node (The 20 byte node of the entry; i.e. the commit hash, file node | |
361 | hash, etc) |
|
361 | hash, etc) | |
362 | - deltabase index offset (The location in the index of the deltabase for |
|
362 | - deltabase index offset (The location in the index of the deltabase for | |
363 | this entry. The deltabase is the next delta in |
|
363 | this entry. The deltabase is the next delta in | |
364 | the chain, with the chain eventually |
|
364 | the chain, with the chain eventually | |
365 | terminating in a full-text, represented by a |
|
365 | terminating in a full-text, represented by a | |
366 | deltabase offset of -1. This lets us compute |
|
366 | deltabase offset of -1. This lets us compute | |
367 | delta chains from the index, then do |
|
367 | delta chains from the index, then do | |
368 | sequential reads from the pack if the revision |
|
368 | sequential reads from the pack if the revision | |
369 | are nearby on disk.) |
|
369 | are nearby on disk.) | |
370 | - pack entry offset (The location of this entry in the datapack) |
|
370 | - pack entry offset (The location of this entry in the datapack) | |
371 | - pack content size (The on-disk length of this entry's pack data) |
|
371 | - pack content size (The on-disk length of this entry's pack data) | |
372 |
|
372 | |||
373 | The fanout is a quick lookup table to reduce the number of steps for |
|
373 | The fanout is a quick lookup table to reduce the number of steps for | |
374 | bisecting the index. It is a series of 4 byte pointers to positions |
|
374 | bisecting the index. It is a series of 4 byte pointers to positions | |
375 | within the index. It has 2^16 entries, which corresponds to hash |
|
375 | within the index. It has 2^16 entries, which corresponds to hash | |
376 | prefixes [0000, 0001,..., FFFE, FFFF]. Example: the pointer in slot |
|
376 | prefixes [0000, 0001,..., FFFE, FFFF]. Example: the pointer in slot | |
377 | 4F0A points to the index position of the first revision whose node |
|
377 | 4F0A points to the index position of the first revision whose node | |
378 | starts with 4F0A. This saves log(2^16)=16 bisect steps. |
|
378 | starts with 4F0A. This saves log(2^16)=16 bisect steps. | |
379 |
|
379 | |||
380 | dataidx = <fanouttable> |
|
380 | dataidx = <fanouttable> | |
381 | <index> |
|
381 | <index> | |
382 | fanouttable = [<index offset: 4 byte unsigned int>,...] (2^16 entries) |
|
382 | fanouttable = [<index offset: 4 byte unsigned int>,...] (2^16 entries) | |
383 | index = [<index entry>,...] |
|
383 | index = [<index entry>,...] | |
384 | indexentry = <node: 20 byte> |
|
384 | indexentry = <node: 20 byte> | |
385 | <deltabase location: 4 byte signed int> |
|
385 | <deltabase location: 4 byte signed int> | |
386 | <pack entry offset: 8 byte unsigned int> |
|
386 | <pack entry offset: 8 byte unsigned int> | |
387 | <pack entry size: 8 byte unsigned int> |
|
387 | <pack entry size: 8 byte unsigned int> | |
388 |
|
388 | |||
389 | [1]: new in version 1. |
|
389 | [1]: new in version 1. | |
390 | """ |
|
390 | """ | |
391 | INDEXSUFFIX = INDEXSUFFIX |
|
391 | INDEXSUFFIX = INDEXSUFFIX | |
392 | PACKSUFFIX = PACKSUFFIX |
|
392 | PACKSUFFIX = PACKSUFFIX | |
393 |
|
393 | |||
394 | # v[01] index format: <node><delta offset><pack data offset><pack data size> |
|
394 | # v[01] index format: <node><delta offset><pack data offset><pack data size> | |
395 | INDEXFORMAT = datapack.INDEXFORMAT |
|
395 | INDEXFORMAT = datapack.INDEXFORMAT | |
396 | INDEXENTRYLENGTH = datapack.INDEXENTRYLENGTH |
|
396 | INDEXENTRYLENGTH = datapack.INDEXENTRYLENGTH | |
397 |
|
397 | |||
398 | # v1 has metadata support |
|
398 | # v1 has metadata support | |
399 | SUPPORTED_VERSIONS = [2] |
|
399 | SUPPORTED_VERSIONS = [2] | |
400 |
|
400 | |||
401 | def _compress(self, data): |
|
401 | def _compress(self, data): | |
402 | return zlib.compress(data) |
|
402 | return zlib.compress(data) | |
403 |
|
403 | |||
404 | def add(self, name, node, deltabasenode, delta, metadata=None): |
|
404 | def add(self, name, node, deltabasenode, delta, metadata=None): | |
405 | # metadata is a dict, ex. {METAKEYFLAG: flag} |
|
405 | # metadata is a dict, ex. {METAKEYFLAG: flag} | |
406 | if len(name) > 2**16: |
|
406 | if len(name) > 2**16: | |
407 | raise RuntimeError(_("name too long %s") % name) |
|
407 | raise RuntimeError(_("name too long %s") % name) | |
408 | if len(node) != 20: |
|
408 | if len(node) != 20: | |
409 | raise RuntimeError(_("node should be 20 bytes %s") % node) |
|
409 | raise RuntimeError(_("node should be 20 bytes %s") % node) | |
410 |
|
410 | |||
411 | if node in self.entries: |
|
411 | if node in self.entries: | |
412 | # The revision has already been added |
|
412 | # The revision has already been added | |
413 | return |
|
413 | return | |
414 |
|
414 | |||
415 | # TODO: allow configurable compression |
|
415 | # TODO: allow configurable compression | |
416 | delta = self._compress(delta) |
|
416 | delta = self._compress(delta) | |
417 |
|
417 | |||
418 | rawdata = ''.join(( |
|
418 | rawdata = ''.join(( | |
419 | struct.pack('!H', len(name)), # unsigned 2 byte int |
|
419 | struct.pack('!H', len(name)), # unsigned 2 byte int | |
420 | name, |
|
420 | name, | |
421 | node, |
|
421 | node, | |
422 | deltabasenode, |
|
422 | deltabasenode, | |
423 | struct.pack('!Q', len(delta)), # unsigned 8 byte int |
|
423 | struct.pack('!Q', len(delta)), # unsigned 8 byte int | |
424 | delta, |
|
424 | delta, | |
425 | )) |
|
425 | )) | |
426 |
|
426 | |||
427 | # v1 support metadata |
|
427 | # v1 support metadata | |
428 | rawmeta = shallowutil.buildpackmeta(metadata) |
|
428 | rawmeta = shallowutil.buildpackmeta(metadata) | |
429 | rawdata += struct.pack('!I', len(rawmeta)) # unsigned 4 byte |
|
429 | rawdata += struct.pack('!I', len(rawmeta)) # unsigned 4 byte | |
430 | rawdata += rawmeta |
|
430 | rawdata += rawmeta | |
431 |
|
431 | |||
432 | offset = self.packfp.tell() |
|
432 | offset = self.packfp.tell() | |
433 |
|
433 | |||
434 | size = len(rawdata) |
|
434 | size = len(rawdata) | |
435 |
|
435 | |||
436 | self.entries[node] = (deltabasenode, offset, size) |
|
436 | self.entries[node] = (deltabasenode, offset, size) | |
437 |
|
437 | |||
438 | self.writeraw(rawdata) |
|
438 | self.writeraw(rawdata) | |
439 |
|
439 | |||
440 | def createindex(self, nodelocations, indexoffset): |
|
440 | def createindex(self, nodelocations, indexoffset): | |
441 | entries = sorted((n, db, o, s) for n, (db, o, s) |
|
441 | entries = sorted((n, db, o, s) for n, (db, o, s) | |
442 | in self.entries.iteritems()) |
|
442 | in self.entries.iteritems()) | |
443 |
|
443 | |||
444 | rawindex = '' |
|
444 | rawindex = '' | |
445 | fmt = self.INDEXFORMAT |
|
445 | fmt = self.INDEXFORMAT | |
446 | for node, deltabase, offset, size in entries: |
|
446 | for node, deltabase, offset, size in entries: | |
447 | if deltabase == nullid: |
|
447 | if deltabase == nullid: | |
448 | deltabaselocation = FULLTEXTINDEXMARK |
|
448 | deltabaselocation = FULLTEXTINDEXMARK | |
449 | else: |
|
449 | else: | |
450 | # Instead of storing the deltabase node in the index, let's |
|
450 | # Instead of storing the deltabase node in the index, let's | |
451 | # store a pointer directly to the index entry for the deltabase. |
|
451 | # store a pointer directly to the index entry for the deltabase. | |
452 | deltabaselocation = nodelocations.get(deltabase, |
|
452 | deltabaselocation = nodelocations.get(deltabase, | |
453 | NOBASEINDEXMARK) |
|
453 | NOBASEINDEXMARK) | |
454 |
|
454 | |||
455 | entry = struct.pack(fmt, node, deltabaselocation, offset, size) |
|
455 | entry = struct.pack(fmt, node, deltabaselocation, offset, size) | |
456 | rawindex += entry |
|
456 | rawindex += entry | |
457 |
|
457 | |||
458 | return rawindex |
|
458 | return rawindex |
@@ -1,380 +1,380 b'' | |||||
1 | #!/usr/bin/env python |
|
1 | #!/usr/bin/env python | |
2 | from __future__ import absolute_import, print_function |
|
2 | from __future__ import absolute_import, print_function | |
3 |
|
3 | |||
4 | import hashlib |
|
4 | import hashlib | |
5 | import os |
|
5 | import os | |
6 | import random |
|
6 | import random | |
7 | import shutil |
|
7 | import shutil | |
8 | import stat |
|
8 | import stat | |
9 | import struct |
|
9 | import struct | |
10 | import sys |
|
10 | import sys | |
11 | import tempfile |
|
11 | import tempfile | |
12 | import time |
|
12 | import time | |
13 | import unittest |
|
13 | import unittest | |
14 |
|
14 | |||
15 | import silenttestrunner |
|
15 | import silenttestrunner | |
16 |
|
16 | |||
17 | # Load the local remotefilelog, not the system one |
|
17 | # Load the local remotefilelog, not the system one | |
18 | sys.path[0:0] = [os.path.join(os.path.dirname(__file__), '..')] |
|
18 | sys.path[0:0] = [os.path.join(os.path.dirname(__file__), '..')] | |
19 | from mercurial.node import nullid |
|
19 | from mercurial.node import nullid | |
20 | from mercurial import ( |
|
20 | from mercurial import ( | |
21 | pycompat, |
|
21 | pycompat, | |
22 | ui as uimod, |
|
22 | ui as uimod, | |
23 | ) |
|
23 | ) | |
24 | from hgext.remotefilelog import ( |
|
24 | from hgext.remotefilelog import ( | |
25 | basepack, |
|
25 | basepack, | |
26 | constants, |
|
26 | constants, | |
27 | datapack, |
|
27 | datapack, | |
28 | ) |
|
28 | ) | |
29 |
|
29 | |||
30 | class datapacktestsbase(object): |
|
30 | class datapacktestsbase(object): | |
31 | def __init__(self, datapackreader, paramsavailable): |
|
31 | def __init__(self, datapackreader, paramsavailable): | |
32 | self.datapackreader = datapackreader |
|
32 | self.datapackreader = datapackreader | |
33 | self.paramsavailable = paramsavailable |
|
33 | self.paramsavailable = paramsavailable | |
34 |
|
34 | |||
35 | def setUp(self): |
|
35 | def setUp(self): | |
36 | self.tempdirs = [] |
|
36 | self.tempdirs = [] | |
37 |
|
37 | |||
38 | def tearDown(self): |
|
38 | def tearDown(self): | |
39 | for d in self.tempdirs: |
|
39 | for d in self.tempdirs: | |
40 | shutil.rmtree(d) |
|
40 | shutil.rmtree(d) | |
41 |
|
41 | |||
42 | def makeTempDir(self): |
|
42 | def makeTempDir(self): | |
43 | tempdir = pycompat.bytestr(tempfile.mkdtemp()) |
|
43 | tempdir = pycompat.bytestr(tempfile.mkdtemp()) | |
44 | self.tempdirs.append(tempdir) |
|
44 | self.tempdirs.append(tempdir) | |
45 | return tempdir |
|
45 | return tempdir | |
46 |
|
46 | |||
47 | def getHash(self, content): |
|
47 | def getHash(self, content): | |
48 | return hashlib.sha1(content).digest() |
|
48 | return hashlib.sha1(content).digest() | |
49 |
|
49 | |||
50 | def getFakeHash(self): |
|
50 | def getFakeHash(self): | |
51 | return b''.join(pycompat.bytechr(random.randint(0, 255)) |
|
51 | return b''.join(pycompat.bytechr(random.randint(0, 255)) | |
52 | for _ in range(20)) |
|
52 | for _ in range(20)) | |
53 |
|
53 | |||
54 | def createPack(self, revisions=None, packdir=None): |
|
54 | def createPack(self, revisions=None, packdir=None): | |
55 | if revisions is None: |
|
55 | if revisions is None: | |
56 | revisions = [(b"filename", self.getFakeHash(), nullid, b"content")] |
|
56 | revisions = [(b"filename", self.getFakeHash(), nullid, b"content")] | |
57 |
|
57 | |||
58 | if packdir is None: |
|
58 | if packdir is None: | |
59 | packdir = self.makeTempDir() |
|
59 | packdir = self.makeTempDir() | |
60 |
|
60 | |||
61 | packer = datapack.mutabledatapack(uimod.ui(), packdir, version=2) |
|
61 | packer = datapack.mutabledatapack(uimod.ui(), packdir, version=2) | |
62 |
|
62 | |||
63 | for args in revisions: |
|
63 | for args in revisions: | |
64 | filename, node, base, content = args[0:4] |
|
64 | filename, node, base, content = args[0:4] | |
65 | # meta is optional |
|
65 | # meta is optional | |
66 | meta = None |
|
66 | meta = None | |
67 | if len(args) > 4: |
|
67 | if len(args) > 4: | |
68 | meta = args[4] |
|
68 | meta = args[4] | |
69 | packer.add(filename, node, base, content, metadata=meta) |
|
69 | packer.add(filename, node, base, content, metadata=meta) | |
70 |
|
70 | |||
71 | path = packer.close() |
|
71 | path = packer.close() | |
72 | return self.datapackreader(path) |
|
72 | return self.datapackreader(path) | |
73 |
|
73 | |||
74 | def _testAddSingle(self, content): |
|
74 | def _testAddSingle(self, content): | |
75 | """Test putting a simple blob into a pack and reading it out. |
|
75 | """Test putting a simple blob into a pack and reading it out. | |
76 | """ |
|
76 | """ | |
77 | filename = b"foo" |
|
77 | filename = b"foo" | |
78 | node = self.getHash(content) |
|
78 | node = self.getHash(content) | |
79 |
|
79 | |||
80 | revisions = [(filename, node, nullid, content)] |
|
80 | revisions = [(filename, node, nullid, content)] | |
81 | pack = self.createPack(revisions) |
|
81 | pack = self.createPack(revisions) | |
82 | if self.paramsavailable: |
|
82 | if self.paramsavailable: | |
83 | self.assertEqual(pack.params.fanoutprefix, |
|
83 | self.assertEqual(pack.params.fanoutprefix, | |
84 | basepack.SMALLFANOUTPREFIX) |
|
84 | basepack.SMALLFANOUTPREFIX) | |
85 |
|
85 | |||
86 | chain = pack.getdeltachain(filename, node) |
|
86 | chain = pack.getdeltachain(filename, node) | |
87 | self.assertEqual(content, chain[0][4]) |
|
87 | self.assertEqual(content, chain[0][4]) | |
88 |
|
88 | |||
89 | def testAddSingle(self): |
|
89 | def testAddSingle(self): | |
90 | self._testAddSingle(b'') |
|
90 | self._testAddSingle(b'') | |
91 |
|
91 | |||
92 | def testAddSingleEmpty(self): |
|
92 | def testAddSingleEmpty(self): | |
93 | self._testAddSingle(b'abcdef') |
|
93 | self._testAddSingle(b'abcdef') | |
94 |
|
94 | |||
95 | def testAddMultiple(self): |
|
95 | def testAddMultiple(self): | |
96 | """Test putting multiple unrelated blobs into a pack and reading them |
|
96 | """Test putting multiple unrelated blobs into a pack and reading them | |
97 | out. |
|
97 | out. | |
98 | """ |
|
98 | """ | |
99 | revisions = [] |
|
99 | revisions = [] | |
100 | for i in range(10): |
|
100 | for i in range(10): | |
101 | filename = b"foo%d" % i |
|
101 | filename = b"foo%d" % i | |
102 | content = b"abcdef%d" % i |
|
102 | content = b"abcdef%d" % i | |
103 | node = self.getHash(content) |
|
103 | node = self.getHash(content) | |
104 | revisions.append((filename, node, self.getFakeHash(), content)) |
|
104 | revisions.append((filename, node, self.getFakeHash(), content)) | |
105 |
|
105 | |||
106 | pack = self.createPack(revisions) |
|
106 | pack = self.createPack(revisions) | |
107 |
|
107 | |||
108 | for filename, node, base, content in revisions: |
|
108 | for filename, node, base, content in revisions: | |
109 | entry = pack.getdelta(filename, node) |
|
109 | entry = pack.getdelta(filename, node) | |
110 | self.assertEqual((content, filename, base, {}), entry) |
|
110 | self.assertEqual((content, filename, base, {}), entry) | |
111 |
|
111 | |||
112 | chain = pack.getdeltachain(filename, node) |
|
112 | chain = pack.getdeltachain(filename, node) | |
113 | self.assertEqual(content, chain[0][4]) |
|
113 | self.assertEqual(content, chain[0][4]) | |
114 |
|
114 | |||
115 | def testAddDeltas(self): |
|
115 | def testAddDeltas(self): | |
116 | """Test putting multiple delta blobs into a pack and read the chain. |
|
116 | """Test putting multiple delta blobs into a pack and read the chain. | |
117 | """ |
|
117 | """ | |
118 | revisions = [] |
|
118 | revisions = [] | |
119 | filename = b"foo" |
|
119 | filename = b"foo" | |
120 | lastnode = nullid |
|
120 | lastnode = nullid | |
121 | for i in range(10): |
|
121 | for i in range(10): | |
122 | content = b"abcdef%d" % i |
|
122 | content = b"abcdef%d" % i | |
123 | node = self.getHash(content) |
|
123 | node = self.getHash(content) | |
124 | revisions.append((filename, node, lastnode, content)) |
|
124 | revisions.append((filename, node, lastnode, content)) | |
125 | lastnode = node |
|
125 | lastnode = node | |
126 |
|
126 | |||
127 | pack = self.createPack(revisions) |
|
127 | pack = self.createPack(revisions) | |
128 |
|
128 | |||
129 | entry = pack.getdelta(filename, revisions[0][1]) |
|
129 | entry = pack.getdelta(filename, revisions[0][1]) | |
130 | realvalue = (revisions[0][3], filename, revisions[0][2], {}) |
|
130 | realvalue = (revisions[0][3], filename, revisions[0][2], {}) | |
131 | self.assertEqual(entry, realvalue) |
|
131 | self.assertEqual(entry, realvalue) | |
132 |
|
132 | |||
133 | # Test that the chain for the final entry has all the others |
|
133 | # Test that the chain for the final entry has all the others | |
134 | chain = pack.getdeltachain(filename, node) |
|
134 | chain = pack.getdeltachain(filename, node) | |
135 | for i in range(10): |
|
135 | for i in range(10): | |
136 | content = b"abcdef%d" % i |
|
136 | content = b"abcdef%d" % i | |
137 | self.assertEqual(content, chain[-i - 1][4]) |
|
137 | self.assertEqual(content, chain[-i - 1][4]) | |
138 |
|
138 | |||
139 | def testPackMany(self): |
|
139 | def testPackMany(self): | |
140 | """Pack many related and unrelated objects. |
|
140 | """Pack many related and unrelated objects. | |
141 | """ |
|
141 | """ | |
142 | # Build a random pack file |
|
142 | # Build a random pack file | |
143 | revisions = [] |
|
143 | revisions = [] | |
144 | blobs = {} |
|
144 | blobs = {} | |
145 | random.seed(0) |
|
145 | random.seed(0) | |
146 | for i in range(100): |
|
146 | for i in range(100): | |
147 | filename = b"filename-%d" % i |
|
147 | filename = b"filename-%d" % i | |
148 | filerevs = [] |
|
148 | filerevs = [] | |
149 | for j in range(random.randint(1, 100)): |
|
149 | for j in range(random.randint(1, 100)): | |
150 | content = b"content-%d" % j |
|
150 | content = b"content-%d" % j | |
151 | node = self.getHash(content) |
|
151 | node = self.getHash(content) | |
152 | lastnode = nullid |
|
152 | lastnode = nullid | |
153 | if len(filerevs) > 0: |
|
153 | if len(filerevs) > 0: | |
154 | lastnode = filerevs[random.randint(0, len(filerevs) - 1)] |
|
154 | lastnode = filerevs[random.randint(0, len(filerevs) - 1)] | |
155 | filerevs.append(node) |
|
155 | filerevs.append(node) | |
156 | blobs[(filename, node, lastnode)] = content |
|
156 | blobs[(filename, node, lastnode)] = content | |
157 | revisions.append((filename, node, lastnode, content)) |
|
157 | revisions.append((filename, node, lastnode, content)) | |
158 |
|
158 | |||
159 | pack = self.createPack(revisions) |
|
159 | pack = self.createPack(revisions) | |
160 |
|
160 | |||
161 | # Verify the pack contents |
|
161 | # Verify the pack contents | |
162 | for (filename, node, lastnode), content in sorted(blobs.items()): |
|
162 | for (filename, node, lastnode), content in sorted(blobs.items()): | |
163 | chain = pack.getdeltachain(filename, node) |
|
163 | chain = pack.getdeltachain(filename, node) | |
164 | for entry in chain: |
|
164 | for entry in chain: | |
165 | expectedcontent = blobs[(entry[0], entry[1], entry[3])] |
|
165 | expectedcontent = blobs[(entry[0], entry[1], entry[3])] | |
166 | self.assertEqual(entry[4], expectedcontent) |
|
166 | self.assertEqual(entry[4], expectedcontent) | |
167 |
|
167 | |||
168 | def testPackMetadata(self): |
|
168 | def testPackMetadata(self): | |
169 | revisions = [] |
|
169 | revisions = [] | |
170 | for i in range(100): |
|
170 | for i in range(100): | |
171 | filename = b'%d.txt' % i |
|
171 | filename = b'%d.txt' % i | |
172 | content = b'put-something-here \n' * i |
|
172 | content = b'put-something-here \n' * i | |
173 | node = self.getHash(content) |
|
173 | node = self.getHash(content) | |
174 | meta = {constants.METAKEYFLAG: i ** 4, |
|
174 | meta = {constants.METAKEYFLAG: i ** 4, | |
175 | constants.METAKEYSIZE: len(content), |
|
175 | constants.METAKEYSIZE: len(content), | |
176 | b'Z': b'random_string', |
|
176 | b'Z': b'random_string', | |
177 | b'_': b'\0' * i} |
|
177 | b'_': b'\0' * i} | |
178 | revisions.append((filename, node, nullid, content, meta)) |
|
178 | revisions.append((filename, node, nullid, content, meta)) | |
179 | pack = self.createPack(revisions) |
|
179 | pack = self.createPack(revisions) | |
180 | for name, node, x, content, origmeta in revisions: |
|
180 | for name, node, x, content, origmeta in revisions: | |
181 | parsedmeta = pack.getmeta(name, node) |
|
181 | parsedmeta = pack.getmeta(name, node) | |
182 | # flag == 0 should be optimized out |
|
182 | # flag == 0 should be optimized out | |
183 | if origmeta[constants.METAKEYFLAG] == 0: |
|
183 | if origmeta[constants.METAKEYFLAG] == 0: | |
184 | del origmeta[constants.METAKEYFLAG] |
|
184 | del origmeta[constants.METAKEYFLAG] | |
185 | self.assertEqual(parsedmeta, origmeta) |
|
185 | self.assertEqual(parsedmeta, origmeta) | |
186 |
|
186 | |||
187 | def testGetMissing(self): |
|
187 | def testGetMissing(self): | |
188 | """Test the getmissing() api. |
|
188 | """Test the getmissing() api. | |
189 | """ |
|
189 | """ | |
190 | revisions = [] |
|
190 | revisions = [] | |
191 | filename = b"foo" |
|
191 | filename = b"foo" | |
192 | lastnode = nullid |
|
192 | lastnode = nullid | |
193 | for i in range(10): |
|
193 | for i in range(10): | |
194 | content = b"abcdef%d" % i |
|
194 | content = b"abcdef%d" % i | |
195 | node = self.getHash(content) |
|
195 | node = self.getHash(content) | |
196 | revisions.append((filename, node, lastnode, content)) |
|
196 | revisions.append((filename, node, lastnode, content)) | |
197 | lastnode = node |
|
197 | lastnode = node | |
198 |
|
198 | |||
199 | pack = self.createPack(revisions) |
|
199 | pack = self.createPack(revisions) | |
200 |
|
200 | |||
201 | missing = pack.getmissing([(b"foo", revisions[0][1])]) |
|
201 | missing = pack.getmissing([(b"foo", revisions[0][1])]) | |
202 | self.assertFalse(missing) |
|
202 | self.assertFalse(missing) | |
203 |
|
203 | |||
204 | missing = pack.getmissing([(b"foo", revisions[0][1]), |
|
204 | missing = pack.getmissing([(b"foo", revisions[0][1]), | |
205 | (b"foo", revisions[1][1])]) |
|
205 | (b"foo", revisions[1][1])]) | |
206 | self.assertFalse(missing) |
|
206 | self.assertFalse(missing) | |
207 |
|
207 | |||
208 | fakenode = self.getFakeHash() |
|
208 | fakenode = self.getFakeHash() | |
209 | missing = pack.getmissing([(b"foo", revisions[0][1]), |
|
209 | missing = pack.getmissing([(b"foo", revisions[0][1]), | |
210 | (b"foo", fakenode)]) |
|
210 | (b"foo", fakenode)]) | |
211 | self.assertEqual(missing, [(b"foo", fakenode)]) |
|
211 | self.assertEqual(missing, [(b"foo", fakenode)]) | |
212 |
|
212 | |||
213 | def testAddThrows(self): |
|
213 | def testAddThrows(self): | |
214 | pack = self.createPack() |
|
214 | pack = self.createPack() | |
215 |
|
215 | |||
216 | try: |
|
216 | try: | |
217 | pack.add(b'filename', nullid, b'contents') |
|
217 | pack.add(b'filename', nullid, b'contents') | |
218 | self.assertTrue(False, "datapack.add should throw") |
|
218 | self.assertTrue(False, "datapack.add should throw") | |
219 | except RuntimeError: |
|
219 | except RuntimeError: | |
220 | pass |
|
220 | pass | |
221 |
|
221 | |||
222 | def testBadVersionThrows(self): |
|
222 | def testBadVersionThrows(self): | |
223 | pack = self.createPack() |
|
223 | pack = self.createPack() | |
224 | path = pack.path + b'.datapack' |
|
224 | path = pack.path + b'.datapack' | |
225 | with open(path, 'rb') as f: |
|
225 | with open(path, 'rb') as f: | |
226 | raw = f.read() |
|
226 | raw = f.read() | |
227 | raw = struct.pack('!B', 255) + raw[1:] |
|
227 | raw = struct.pack('!B', 255) + raw[1:] | |
228 | os.chmod(path, os.stat(path).st_mode | stat.S_IWRITE) |
|
228 | os.chmod(path, os.stat(path).st_mode | stat.S_IWRITE) | |
229 | with open(path, 'wb+') as f: |
|
229 | with open(path, 'wb+') as f: | |
230 | f.write(raw) |
|
230 | f.write(raw) | |
231 |
|
231 | |||
232 | try: |
|
232 | try: | |
233 | pack = self.datapackreader(pack.path) |
|
233 | pack = self.datapackreader(pack.path) | |
234 | self.assertTrue(False, "bad version number should have thrown") |
|
234 | self.assertTrue(False, "bad version number should have thrown") | |
235 | except RuntimeError: |
|
235 | except RuntimeError: | |
236 | pass |
|
236 | pass | |
237 |
|
237 | |||
238 | def testMissingDeltabase(self): |
|
238 | def testMissingDeltabase(self): | |
239 | fakenode = self.getFakeHash() |
|
239 | fakenode = self.getFakeHash() | |
240 | revisions = [(b"filename", fakenode, self.getFakeHash(), b"content")] |
|
240 | revisions = [(b"filename", fakenode, self.getFakeHash(), b"content")] | |
241 | pack = self.createPack(revisions) |
|
241 | pack = self.createPack(revisions) | |
242 | chain = pack.getdeltachain(b"filename", fakenode) |
|
242 | chain = pack.getdeltachain(b"filename", fakenode) | |
243 | self.assertEqual(len(chain), 1) |
|
243 | self.assertEqual(len(chain), 1) | |
244 |
|
244 | |||
245 | def testLargePack(self): |
|
245 | def testLargePack(self): | |
246 | """Test creating and reading from a large pack with over X entries. |
|
246 | """Test creating and reading from a large pack with over X entries. | |
247 | This causes it to use a 2^16 fanout table instead.""" |
|
247 | This causes it to use a 2^16 fanout table instead.""" | |
248 | revisions = [] |
|
248 | revisions = [] | |
249 | blobs = {} |
|
249 | blobs = {} | |
250 | total = basepack.SMALLFANOUTCUTOFF + 1 |
|
250 | total = basepack.SMALLFANOUTCUTOFF + 1 | |
251 | for i in pycompat.xrange(total): |
|
251 | for i in pycompat.xrange(total): | |
252 | filename = b"filename-%d" % i |
|
252 | filename = b"filename-%d" % i | |
253 | content = filename |
|
253 | content = filename | |
254 | node = self.getHash(content) |
|
254 | node = self.getHash(content) | |
255 | blobs[(filename, node)] = content |
|
255 | blobs[(filename, node)] = content | |
256 | revisions.append((filename, node, nullid, content)) |
|
256 | revisions.append((filename, node, nullid, content)) | |
257 |
|
257 | |||
258 | pack = self.createPack(revisions) |
|
258 | pack = self.createPack(revisions) | |
259 | if self.paramsavailable: |
|
259 | if self.paramsavailable: | |
260 | self.assertEqual(pack.params.fanoutprefix, |
|
260 | self.assertEqual(pack.params.fanoutprefix, | |
261 | basepack.LARGEFANOUTPREFIX) |
|
261 | basepack.LARGEFANOUTPREFIX) | |
262 |
|
262 | |||
263 | for (filename, node), content in blobs.items(): |
|
263 | for (filename, node), content in blobs.items(): | |
264 | actualcontent = pack.getdeltachain(filename, node)[0][4] |
|
264 | actualcontent = pack.getdeltachain(filename, node)[0][4] | |
265 | self.assertEqual(actualcontent, content) |
|
265 | self.assertEqual(actualcontent, content) | |
266 |
|
266 | |||
267 | def testPacksCache(self): |
|
267 | def testPacksCache(self): | |
268 | """Test that we remember the most recent packs while fetching the delta |
|
268 | """Test that we remember the most recent packs while fetching the delta | |
269 | chain.""" |
|
269 | chain.""" | |
270 |
|
270 | |||
271 | packdir = self.makeTempDir() |
|
271 | packdir = self.makeTempDir() | |
272 | deltachains = [] |
|
272 | deltachains = [] | |
273 |
|
273 | |||
274 | numpacks = 10 |
|
274 | numpacks = 10 | |
275 | revisionsperpack = 100 |
|
275 | revisionsperpack = 100 | |
276 |
|
276 | |||
277 | for i in range(numpacks): |
|
277 | for i in range(numpacks): | |
278 | chain = [] |
|
278 | chain = [] | |
279 | revision = (b'%d' % i, self.getFakeHash(), nullid, b"content") |
|
279 | revision = (b'%d' % i, self.getFakeHash(), nullid, b"content") | |
280 |
|
280 | |||
281 | for _ in range(revisionsperpack): |
|
281 | for _ in range(revisionsperpack): | |
282 | chain.append(revision) |
|
282 | chain.append(revision) | |
283 | revision = ( |
|
283 | revision = ( | |
284 | b'%d' % i, |
|
284 | b'%d' % i, | |
285 | self.getFakeHash(), |
|
285 | self.getFakeHash(), | |
286 | revision[1], |
|
286 | revision[1], | |
287 | self.getFakeHash() |
|
287 | self.getFakeHash() | |
288 | ) |
|
288 | ) | |
289 |
|
289 | |||
290 | self.createPack(chain, packdir) |
|
290 | self.createPack(chain, packdir) | |
291 | deltachains.append(chain) |
|
291 | deltachains.append(chain) | |
292 |
|
292 | |||
293 | class testdatapackstore(datapack.datapackstore): |
|
293 | class testdatapackstore(datapack.datapackstore): | |
294 | # Ensures that we are not keeping everything in the cache. |
|
294 | # Ensures that we are not keeping everything in the cache. | |
295 |
DEFAULTCACHESIZE = |
|
295 | DEFAULTCACHESIZE = numpacks // 2 | |
296 |
|
296 | |||
297 | store = testdatapackstore(uimod.ui(), packdir) |
|
297 | store = testdatapackstore(uimod.ui(), packdir) | |
298 |
|
298 | |||
299 | random.shuffle(deltachains) |
|
299 | random.shuffle(deltachains) | |
300 | for randomchain in deltachains: |
|
300 | for randomchain in deltachains: | |
301 | revision = random.choice(randomchain) |
|
301 | revision = random.choice(randomchain) | |
302 | chain = store.getdeltachain(revision[0], revision[1]) |
|
302 | chain = store.getdeltachain(revision[0], revision[1]) | |
303 |
|
303 | |||
304 | mostrecentpack = next(iter(store.packs), None) |
|
304 | mostrecentpack = next(iter(store.packs), None) | |
305 | self.assertEqual( |
|
305 | self.assertEqual( | |
306 | mostrecentpack.getdeltachain(revision[0], revision[1]), |
|
306 | mostrecentpack.getdeltachain(revision[0], revision[1]), | |
307 | chain |
|
307 | chain | |
308 | ) |
|
308 | ) | |
309 |
|
309 | |||
310 | self.assertEqual(randomchain.index(revision) + 1, len(chain)) |
|
310 | self.assertEqual(randomchain.index(revision) + 1, len(chain)) | |
311 |
|
311 | |||
312 | # perf test off by default since it's slow |
|
312 | # perf test off by default since it's slow | |
313 | def _testIndexPerf(self): |
|
313 | def _testIndexPerf(self): | |
314 | random.seed(0) |
|
314 | random.seed(0) | |
315 | print("Multi-get perf test") |
|
315 | print("Multi-get perf test") | |
316 | packsizes = [ |
|
316 | packsizes = [ | |
317 | 100, |
|
317 | 100, | |
318 | 10000, |
|
318 | 10000, | |
319 | 100000, |
|
319 | 100000, | |
320 | 500000, |
|
320 | 500000, | |
321 | 1000000, |
|
321 | 1000000, | |
322 | 3000000, |
|
322 | 3000000, | |
323 | ] |
|
323 | ] | |
324 | lookupsizes = [ |
|
324 | lookupsizes = [ | |
325 | 10, |
|
325 | 10, | |
326 | 100, |
|
326 | 100, | |
327 | 1000, |
|
327 | 1000, | |
328 | 10000, |
|
328 | 10000, | |
329 | 100000, |
|
329 | 100000, | |
330 | 1000000, |
|
330 | 1000000, | |
331 | ] |
|
331 | ] | |
332 | for packsize in packsizes: |
|
332 | for packsize in packsizes: | |
333 | revisions = [] |
|
333 | revisions = [] | |
334 | for i in pycompat.xrange(packsize): |
|
334 | for i in pycompat.xrange(packsize): | |
335 | filename = b"filename-%d" % i |
|
335 | filename = b"filename-%d" % i | |
336 | content = b"content-%d" % i |
|
336 | content = b"content-%d" % i | |
337 | node = self.getHash(content) |
|
337 | node = self.getHash(content) | |
338 | revisions.append((filename, node, nullid, content)) |
|
338 | revisions.append((filename, node, nullid, content)) | |
339 |
|
339 | |||
340 | path = self.createPack(revisions).path |
|
340 | path = self.createPack(revisions).path | |
341 |
|
341 | |||
342 | # Perf of large multi-get |
|
342 | # Perf of large multi-get | |
343 | import gc |
|
343 | import gc | |
344 | gc.disable() |
|
344 | gc.disable() | |
345 | pack = self.datapackreader(path) |
|
345 | pack = self.datapackreader(path) | |
346 | for lookupsize in lookupsizes: |
|
346 | for lookupsize in lookupsizes: | |
347 | if lookupsize > packsize: |
|
347 | if lookupsize > packsize: | |
348 | continue |
|
348 | continue | |
349 | random.shuffle(revisions) |
|
349 | random.shuffle(revisions) | |
350 | findnodes = [(rev[0], rev[1]) for rev in revisions] |
|
350 | findnodes = [(rev[0], rev[1]) for rev in revisions] | |
351 |
|
351 | |||
352 | start = time.time() |
|
352 | start = time.time() | |
353 | pack.getmissing(findnodes[:lookupsize]) |
|
353 | pack.getmissing(findnodes[:lookupsize]) | |
354 | elapsed = time.time() - start |
|
354 | elapsed = time.time() - start | |
355 | print ("%s pack %d lookups = %0.04f" % |
|
355 | print ("%s pack %d lookups = %0.04f" % | |
356 | (('%d' % packsize).rjust(7), |
|
356 | (('%d' % packsize).rjust(7), | |
357 | ('%d' % lookupsize).rjust(7), |
|
357 | ('%d' % lookupsize).rjust(7), | |
358 | elapsed)) |
|
358 | elapsed)) | |
359 |
|
359 | |||
360 | print("") |
|
360 | print("") | |
361 | gc.enable() |
|
361 | gc.enable() | |
362 |
|
362 | |||
363 | # The perf test is meant to produce output, so we always fail the test |
|
363 | # The perf test is meant to produce output, so we always fail the test | |
364 | # so the user sees the output. |
|
364 | # so the user sees the output. | |
365 | raise RuntimeError("perf test always fails") |
|
365 | raise RuntimeError("perf test always fails") | |
366 |
|
366 | |||
367 | class datapacktests(datapacktestsbase, unittest.TestCase): |
|
367 | class datapacktests(datapacktestsbase, unittest.TestCase): | |
368 | def __init__(self, *args, **kwargs): |
|
368 | def __init__(self, *args, **kwargs): | |
369 | datapacktestsbase.__init__(self, datapack.datapack, True) |
|
369 | datapacktestsbase.__init__(self, datapack.datapack, True) | |
370 | unittest.TestCase.__init__(self, *args, **kwargs) |
|
370 | unittest.TestCase.__init__(self, *args, **kwargs) | |
371 |
|
371 | |||
372 | # TODO: |
|
372 | # TODO: | |
373 | # datapack store: |
|
373 | # datapack store: | |
374 | # - getmissing |
|
374 | # - getmissing | |
375 | # - GC two packs into one |
|
375 | # - GC two packs into one | |
376 |
|
376 | |||
377 | if __name__ == '__main__': |
|
377 | if __name__ == '__main__': | |
378 | if pycompat.iswindows: |
|
378 | if pycompat.iswindows: | |
379 | sys.exit(80) # Skip on Windows |
|
379 | sys.exit(80) # Skip on Windows | |
380 | silenttestrunner.main(__name__) |
|
380 | silenttestrunner.main(__name__) |
General Comments 0
You need to be logged in to leave comments.
Login now