##// END OF EJS Templates
sparse-read: move from a recursive-based approach to a heap-based one...
Boris Feld -
r34881:9e18ab7f default
parent child Browse files
Show More
@@ -17,6 +17,7 b' import binascii'
17 import collections
17 import collections
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import os
21 import os
21 import struct
22 import struct
22 import zlib
23 import zlib
@@ -170,49 +171,59 b' def _slicechunk(revlog, revs):'
170 start = revlog.start
171 start = revlog.start
171 length = revlog.length
172 length = revlog.length
172
173
173 chunkqueue = collections.deque()
174 if len(revs) <= 1:
174 chunkqueue.append((revs, 0))
175 yield revs
175
176 return
176 while chunkqueue:
177 revs, depth = chunkqueue.popleft()
178
177
179 startbyte = start(revs[0])
178 startbyte = start(revs[0])
180 endbyte = start(revs[-1]) + length(revs[-1])
179 endbyte = start(revs[-1]) + length(revs[-1])
181 deltachainspan = endbyte - startbyte
180 readdata = deltachainspan = endbyte - startbyte
182
181
183 if deltachainspan <= revlog._srminblocksize or len(revs) <= 1:
182 chainpayload = sum(length(r) for r in revs)
184 yield revs
185 continue
186
183
187 # Find where is the largest hole (this is where we would split) and
184 if deltachainspan:
188 # sum up the lengths of useful data to compute the density of the span
185 density = chainpayload / float(deltachainspan)
189 textlen = 0
186 else:
187 density = 1.0
188
189 # Store the gaps in a heap to have them sorted by decreasing size
190 gapsheap = []
191 heapq.heapify(gapsheap)
190 prevend = None
192 prevend = None
191 largesthole = 0
192 idxlargesthole = -1
193 for i, rev in enumerate(revs):
193 for i, rev in enumerate(revs):
194 revstart = start(rev)
194 revstart = start(rev)
195 revlen = length(rev)
195 revlen = length(rev)
196
196
197 if prevend is not None:
197 if prevend is not None:
198 hole = revstart - prevend
198 gapsize = revstart - prevend
199 if hole > largesthole:
199 if gapsize:
200 largesthole = hole
200 heapq.heappush(gapsheap, (-gapsize, i))
201 idxlargesthole = i
202
201
203 textlen += revlen
204 prevend = revstart + revlen
202 prevend = revstart + revlen
205
203
206 density = textlen / float(deltachainspan) if deltachainspan > 0 else 1.0
204 # Collect the indices of the largest holes until the density is acceptable
205 indicesheap = []
206 heapq.heapify(indicesheap)
207 while gapsheap and density < revlog._srdensitythreshold:
208 oppgapsize, gapidx = heapq.heappop(gapsheap)
209
210 heapq.heappush(indicesheap, gapidx)
207
211
208 if density > revlog._srdensitythreshold:
212 # the gap sizes are stored as negatives to be sorted decreasingly
209 yield revs
213 # by the heap
210 continue
214 readdata -= (-oppgapsize)
215 if readdata > 0:
216 density = chainpayload / float(readdata)
217 else:
218 density = 1.0
211
219
212 # Add the left and right parts so that they will be sliced
220 # Cut the revs at collected indices
213 # recursively too
221 previdx = 0
214 chunkqueue.append((revs[:idxlargesthole], depth + 1))
222 while indicesheap:
215 chunkqueue.append((revs[idxlargesthole:], depth + 1))
223 idx = heapq.heappop(indicesheap)
224 yield revs[previdx:idx]
225 previdx = idx
226 yield revs[previdx:]
216
227
217 # index v0:
228 # index v0:
218 # 4 bytes: offset
229 # 4 bytes: offset
General Comments 0
You need to be logged in to leave comments. Login now