##// END OF EJS Templates
sparse-read: move from a recursive-based approach to a heap-based one...
Boris Feld -
r34881:9e18ab7f default
parent child Browse files
Show More
@@ -17,6 +17,7 b' import binascii'
17 17 import collections
18 18 import errno
19 19 import hashlib
20 import heapq
20 21 import os
21 22 import struct
22 23 import zlib
@@ -170,49 +171,59 b' def _slicechunk(revlog, revs):'
170 171 start = revlog.start
171 172 length = revlog.length
172 173
173 chunkqueue = collections.deque()
174 chunkqueue.append((revs, 0))
174 if len(revs) <= 1:
175 yield revs
176 return
175 177
176 while chunkqueue:
177 revs, depth = chunkqueue.popleft()
178 startbyte = start(revs[0])
179 endbyte = start(revs[-1]) + length(revs[-1])
180 readdata = deltachainspan = endbyte - startbyte
181
182 chainpayload = sum(length(r) for r in revs)
178 183
179 startbyte = start(revs[0])
180 endbyte = start(revs[-1]) + length(revs[-1])
181 deltachainspan = endbyte - startbyte
184 if deltachainspan:
185 density = chainpayload / float(deltachainspan)
186 else:
187 density = 1.0
182 188
183 if deltachainspan <= revlog._srminblocksize or len(revs) <= 1:
184 yield revs
185 continue
189 # Store the gaps in a heap to have them sorted by decreasing size
190 gapsheap = []
191 heapq.heapify(gapsheap)
192 prevend = None
193 for i, rev in enumerate(revs):
194 revstart = start(rev)
195 revlen = length(rev)
186 196
187 # Find where is the largest hole (this is where we would split) and
188 # sum up the lengths of useful data to compute the density of the span
189 textlen = 0
190 prevend = None
191 largesthole = 0
192 idxlargesthole = -1
193 for i, rev in enumerate(revs):
194 revstart = start(rev)
195 revlen = length(rev)
197 if prevend is not None:
198 gapsize = revstart - prevend
199 if gapsize:
200 heapq.heappush(gapsheap, (-gapsize, i))
201
202 prevend = revstart + revlen
203
204 # Collect the indices of the largest holes until the density is acceptable
205 indicesheap = []
206 heapq.heapify(indicesheap)
207 while gapsheap and density < revlog._srdensitythreshold:
208 oppgapsize, gapidx = heapq.heappop(gapsheap)
209
210 heapq.heappush(indicesheap, gapidx)
196 211
197 if prevend is not None:
198 hole = revstart - prevend
199 if hole > largesthole:
200 largesthole = hole
201 idxlargesthole = i
202
203 textlen += revlen
204 prevend = revstart + revlen
212 # the gap sizes are stored as negatives to be sorted decreasingly
213 # by the heap
214 readdata -= (-oppgapsize)
215 if readdata > 0:
216 density = chainpayload / float(readdata)
217 else:
218 density = 1.0
205 219
206 density = textlen / float(deltachainspan) if deltachainspan > 0 else 1.0
207
208 if density > revlog._srdensitythreshold:
209 yield revs
210 continue
211
212 # Add the left and right parts so that they will be sliced
213 # recursively too
214 chunkqueue.append((revs[:idxlargesthole], depth + 1))
215 chunkqueue.append((revs[idxlargesthole:], depth + 1))
220 # Cut the revs at collected indices
221 previdx = 0
222 while indicesheap:
223 idx = heapq.heappop(indicesheap)
224 yield revs[previdx:idx]
225 previdx = idx
226 yield revs[previdx:]
216 227
217 228 # index v0:
218 229 # 4 bytes: offset
General Comments 0
You need to be logged in to leave comments. Login now