##// END OF EJS Templates
sparse-read: move from a recursive-based approach to a heap-based one...
Boris Feld -
r34881:9e18ab7f default
parent child Browse files
Show More
@@ -17,6 +17,7 b' import binascii'
17 import collections
17 import collections
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import os
21 import os
21 import struct
22 import struct
22 import zlib
23 import zlib
@@ -170,49 +171,59 b' def _slicechunk(revlog, revs):'
170 start = revlog.start
171 start = revlog.start
171 length = revlog.length
172 length = revlog.length
172
173
173 chunkqueue = collections.deque()
174 if len(revs) <= 1:
174 chunkqueue.append((revs, 0))
175 yield revs
176 return
175
177
176 while chunkqueue:
178 startbyte = start(revs[0])
177 revs, depth = chunkqueue.popleft()
179 endbyte = start(revs[-1]) + length(revs[-1])
180 readdata = deltachainspan = endbyte - startbyte
181
182 chainpayload = sum(length(r) for r in revs)
178
183
179 startbyte = start(revs[0])
184 if deltachainspan:
180 endbyte = start(revs[-1]) + length(revs[-1])
185 density = chainpayload / float(deltachainspan)
181 deltachainspan = endbyte - startbyte
186 else:
187 density = 1.0
182
188
183 if deltachainspan <= revlog._srminblocksize or len(revs) <= 1:
189 # Store the gaps in a heap to have them sorted by decreasing size
184 yield revs
190 gapsheap = []
185 continue
191 heapq.heapify(gapsheap)
192 prevend = None
193 for i, rev in enumerate(revs):
194 revstart = start(rev)
195 revlen = length(rev)
186
196
187 # Find where is the largest hole (this is where we would split) and
197 if prevend is not None:
188 # sum up the lengths of useful data to compute the density of the span
198 gapsize = revstart - prevend
189 textlen = 0
199 if gapsize:
190 prevend = None
200 heapq.heappush(gapsheap, (-gapsize, i))
191 largesthole = 0
201
192 idxlargesthole = -1
202 prevend = revstart + revlen
193 for i, rev in enumerate(revs):
203
194 revstart = start(rev)
204 # Collect the indices of the largest holes until the density is acceptable
195 revlen = length(rev)
205 indicesheap = []
206 heapq.heapify(indicesheap)
207 while gapsheap and density < revlog._srdensitythreshold:
208 oppgapsize, gapidx = heapq.heappop(gapsheap)
209
210 heapq.heappush(indicesheap, gapidx)
196
211
197 if prevend is not None:
212 # the gap sizes are stored as negatives to be sorted decreasingly
198 hole = revstart - prevend
213 # by the heap
199 if hole > largesthole:
214 readdata -= (-oppgapsize)
200 largesthole = hole
215 if readdata > 0:
201 idxlargesthole = i
216 density = chainpayload / float(readdata)
202
217 else:
203 textlen += revlen
218 density = 1.0
204 prevend = revstart + revlen
205
219
206 density = textlen / float(deltachainspan) if deltachainspan > 0 else 1.0
220 # Cut the revs at collected indices
207
221 previdx = 0
208 if density > revlog._srdensitythreshold:
222 while indicesheap:
209 yield revs
223 idx = heapq.heappop(indicesheap)
210 continue
224 yield revs[previdx:idx]
211
225 previdx = idx
212 # Add the left and right parts so that they will be sliced
226 yield revs[previdx:]
213 # recursively too
214 chunkqueue.append((revs[:idxlargesthole], depth + 1))
215 chunkqueue.append((revs[idxlargesthole:], depth + 1))
216
227
217 # index v0:
228 # index v0:
218 # 4 bytes: offset
229 # 4 bytes: offset
General Comments 0
You need to be logged in to leave comments. Login now