##// END OF EJS Templates
delta: exclude base candidate much smaller than the target...
Boris Feld -
r41014:42f59d3f default
parent child Browse files
Show More
@@ -1,981 +1,989 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import collections
13 13 import struct
14 14
15 15 # import stuff from node for others to import from revlog
16 16 from ..node import (
17 17 nullrev,
18 18 )
19 19 from ..i18n import _
20 20
21 21 from .constants import (
22 22 REVIDX_ISCENSORED,
23 23 REVIDX_RAWTEXT_CHANGING_FLAGS,
24 24 )
25 25
26 26 from ..thirdparty import (
27 27 attr,
28 28 )
29 29
30 30 from .. import (
31 31 error,
32 32 mdiff,
33 33 )
34 34
35 35 # maximum <delta-chain-data>/<revision-text-length> ratio
36 36 LIMIT_DELTA2TEXT = 2
37 37
38 38 class _testrevlog(object):
39 39 """minimalist fake revlog to use in doctests"""
40 40
41 41 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
42 42 """data is an list of revision payload boundaries"""
43 43 self._data = data
44 44 self._srdensitythreshold = density
45 45 self._srmingapsize = mingap
46 46 self._snapshot = set(snapshot)
47 47 self.index = None
48 48
49 49 def start(self, rev):
50 50 if rev == 0:
51 51 return 0
52 52 return self._data[rev - 1]
53 53
54 54 def end(self, rev):
55 55 return self._data[rev]
56 56
57 57 def length(self, rev):
58 58 return self.end(rev) - self.start(rev)
59 59
60 60 def __len__(self):
61 61 return len(self._data)
62 62
63 63 def issnapshot(self, rev):
64 64 return rev in self._snapshot
65 65
66 66 def slicechunk(revlog, revs, targetsize=None):
67 67 """slice revs to reduce the amount of unrelated data to be read from disk.
68 68
69 69 ``revs`` is sliced into groups that should be read in one time.
70 70 Assume that revs are sorted.
71 71
72 72 The initial chunk is sliced until the overall density (payload/chunks-span
73 73 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
74 74 `revlog._srmingapsize` is skipped.
75 75
76 76 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
77 77 For consistency with other slicing choice, this limit won't go lower than
78 78 `revlog._srmingapsize`.
79 79
80 80 If individual revisions chunk are larger than this limit, they will still
81 81 be raised individually.
82 82
83 83 >>> data = [
84 84 ... 5, #00 (5)
85 85 ... 10, #01 (5)
86 86 ... 12, #02 (2)
87 87 ... 12, #03 (empty)
88 88 ... 27, #04 (15)
89 89 ... 31, #05 (4)
90 90 ... 31, #06 (empty)
91 91 ... 42, #07 (11)
92 92 ... 47, #08 (5)
93 93 ... 47, #09 (empty)
94 94 ... 48, #10 (1)
95 95 ... 51, #11 (3)
96 96 ... 74, #12 (23)
97 97 ... 85, #13 (11)
98 98 ... 86, #14 (1)
99 99 ... 91, #15 (5)
100 100 ... ]
101 101 >>> revlog = _testrevlog(data, snapshot=range(16))
102 102
103 103 >>> list(slicechunk(revlog, list(range(16))))
104 104 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
105 105 >>> list(slicechunk(revlog, [0, 15]))
106 106 [[0], [15]]
107 107 >>> list(slicechunk(revlog, [0, 11, 15]))
108 108 [[0], [11], [15]]
109 109 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
110 110 [[0], [11, 13, 15]]
111 111 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
112 112 [[1, 2], [5, 8, 10, 11], [14]]
113 113
114 114 Slicing with a maximum chunk size
115 115 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
116 116 [[0], [11], [13], [15]]
117 117 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
118 118 [[0], [11], [13, 15]]
119 119 """
120 120 if targetsize is not None:
121 121 targetsize = max(targetsize, revlog._srmingapsize)
122 122 # targetsize should not be specified when evaluating delta candidates:
123 123 # * targetsize is used to ensure we stay within specification when reading,
124 124 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
125 125 if densityslicing is None:
126 126 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
127 127 for chunk in densityslicing(revs,
128 128 revlog._srdensitythreshold,
129 129 revlog._srmingapsize):
130 130 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
131 131 yield subchunk
132 132
133 133 def _slicechunktosize(revlog, revs, targetsize=None):
134 134 """slice revs to match the target size
135 135
136 136 This is intended to be used on chunk that density slicing selected by that
137 137 are still too large compared to the read garantee of revlog. This might
138 138 happens when "minimal gap size" interrupted the slicing or when chain are
139 139 built in a way that create large blocks next to each other.
140 140
141 141 >>> data = [
142 142 ... 3, #0 (3)
143 143 ... 5, #1 (2)
144 144 ... 6, #2 (1)
145 145 ... 8, #3 (2)
146 146 ... 8, #4 (empty)
147 147 ... 11, #5 (3)
148 148 ... 12, #6 (1)
149 149 ... 13, #7 (1)
150 150 ... 14, #8 (1)
151 151 ... ]
152 152
153 153 == All snapshots cases ==
154 154 >>> revlog = _testrevlog(data, snapshot=range(9))
155 155
156 156 Cases where chunk is already small enough
157 157 >>> list(_slicechunktosize(revlog, [0], 3))
158 158 [[0]]
159 159 >>> list(_slicechunktosize(revlog, [6, 7], 3))
160 160 [[6, 7]]
161 161 >>> list(_slicechunktosize(revlog, [0], None))
162 162 [[0]]
163 163 >>> list(_slicechunktosize(revlog, [6, 7], None))
164 164 [[6, 7]]
165 165
166 166 cases where we need actual slicing
167 167 >>> list(_slicechunktosize(revlog, [0, 1], 3))
168 168 [[0], [1]]
169 169 >>> list(_slicechunktosize(revlog, [1, 3], 3))
170 170 [[1], [3]]
171 171 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
172 172 [[1, 2], [3]]
173 173 >>> list(_slicechunktosize(revlog, [3, 5], 3))
174 174 [[3], [5]]
175 175 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
176 176 [[3], [5]]
177 177 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
178 178 [[5], [6, 7, 8]]
179 179 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
180 180 [[0], [1, 2], [3], [5], [6, 7, 8]]
181 181
182 182 Case with too large individual chunk (must return valid chunk)
183 183 >>> list(_slicechunktosize(revlog, [0, 1], 2))
184 184 [[0], [1]]
185 185 >>> list(_slicechunktosize(revlog, [1, 3], 1))
186 186 [[1], [3]]
187 187 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
188 188 [[3], [5]]
189 189
190 190 == No Snapshot cases ==
191 191 >>> revlog = _testrevlog(data)
192 192
193 193 Cases where chunk is already small enough
194 194 >>> list(_slicechunktosize(revlog, [0], 3))
195 195 [[0]]
196 196 >>> list(_slicechunktosize(revlog, [6, 7], 3))
197 197 [[6, 7]]
198 198 >>> list(_slicechunktosize(revlog, [0], None))
199 199 [[0]]
200 200 >>> list(_slicechunktosize(revlog, [6, 7], None))
201 201 [[6, 7]]
202 202
203 203 cases where we need actual slicing
204 204 >>> list(_slicechunktosize(revlog, [0, 1], 3))
205 205 [[0], [1]]
206 206 >>> list(_slicechunktosize(revlog, [1, 3], 3))
207 207 [[1], [3]]
208 208 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
209 209 [[1], [2, 3]]
210 210 >>> list(_slicechunktosize(revlog, [3, 5], 3))
211 211 [[3], [5]]
212 212 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
213 213 [[3], [4, 5]]
214 214 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
215 215 [[5], [6, 7, 8]]
216 216 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
217 217 [[0], [1, 2], [3], [5], [6, 7, 8]]
218 218
219 219 Case with too large individual chunk (must return valid chunk)
220 220 >>> list(_slicechunktosize(revlog, [0, 1], 2))
221 221 [[0], [1]]
222 222 >>> list(_slicechunktosize(revlog, [1, 3], 1))
223 223 [[1], [3]]
224 224 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
225 225 [[3], [5]]
226 226
227 227 == mixed case ==
228 228 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
229 229 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
230 230 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
231 231 """
232 232 assert targetsize is None or 0 <= targetsize
233 233 startdata = revlog.start(revs[0])
234 234 enddata = revlog.end(revs[-1])
235 235 fullspan = enddata - startdata
236 236 if targetsize is None or fullspan <= targetsize:
237 237 yield revs
238 238 return
239 239
240 240 startrevidx = 0
241 241 endrevidx = 1
242 242 iterrevs = enumerate(revs)
243 243 next(iterrevs) # skip first rev.
244 244 # first step: get snapshots out of the way
245 245 for idx, r in iterrevs:
246 246 span = revlog.end(r) - startdata
247 247 snapshot = revlog.issnapshot(r)
248 248 if span <= targetsize and snapshot:
249 249 endrevidx = idx + 1
250 250 else:
251 251 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
252 252 if chunk:
253 253 yield chunk
254 254 startrevidx = idx
255 255 startdata = revlog.start(r)
256 256 endrevidx = idx + 1
257 257 if not snapshot:
258 258 break
259 259
260 260 # for the others, we use binary slicing to quickly converge toward valid
261 261 # chunks (otherwise, we might end up looking for start/end of many
262 262 # revisions). This logic is not looking for the perfect slicing point, it
263 263 # focuses on quickly converging toward valid chunks.
264 264 nbitem = len(revs)
265 265 while (enddata - startdata) > targetsize:
266 266 endrevidx = nbitem
267 267 if nbitem - startrevidx <= 1:
268 268 break # protect against individual chunk larger than limit
269 269 localenddata = revlog.end(revs[endrevidx - 1])
270 270 span = localenddata - startdata
271 271 while span > targetsize:
272 272 if endrevidx - startrevidx <= 1:
273 273 break # protect against individual chunk larger than limit
274 274 endrevidx -= (endrevidx - startrevidx) // 2
275 275 localenddata = revlog.end(revs[endrevidx - 1])
276 276 span = localenddata - startdata
277 277 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
278 278 if chunk:
279 279 yield chunk
280 280 startrevidx = endrevidx
281 281 startdata = revlog.start(revs[startrevidx])
282 282
283 283 chunk = _trimchunk(revlog, revs, startrevidx)
284 284 if chunk:
285 285 yield chunk
286 286
287 287 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
288 288 mingapsize=0):
289 289 """slice revs to reduce the amount of unrelated data to be read from disk.
290 290
291 291 ``revs`` is sliced into groups that should be read in one time.
292 292 Assume that revs are sorted.
293 293
294 294 The initial chunk is sliced until the overall density (payload/chunks-span
295 295 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
296 296 skipped.
297 297
298 298 >>> revlog = _testrevlog([
299 299 ... 5, #00 (5)
300 300 ... 10, #01 (5)
301 301 ... 12, #02 (2)
302 302 ... 12, #03 (empty)
303 303 ... 27, #04 (15)
304 304 ... 31, #05 (4)
305 305 ... 31, #06 (empty)
306 306 ... 42, #07 (11)
307 307 ... 47, #08 (5)
308 308 ... 47, #09 (empty)
309 309 ... 48, #10 (1)
310 310 ... 51, #11 (3)
311 311 ... 74, #12 (23)
312 312 ... 85, #13 (11)
313 313 ... 86, #14 (1)
314 314 ... 91, #15 (5)
315 315 ... ])
316 316
317 317 >>> list(_slicechunktodensity(revlog, list(range(16))))
318 318 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
319 319 >>> list(_slicechunktodensity(revlog, [0, 15]))
320 320 [[0], [15]]
321 321 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
322 322 [[0], [11], [15]]
323 323 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
324 324 [[0], [11, 13, 15]]
325 325 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
326 326 [[1, 2], [5, 8, 10, 11], [14]]
327 327 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
328 328 ... mingapsize=20))
329 329 [[1, 2, 3, 5, 8, 10, 11], [14]]
330 330 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
331 331 ... targetdensity=0.95))
332 332 [[1, 2], [5], [8, 10, 11], [14]]
333 333 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
334 334 ... targetdensity=0.95, mingapsize=12))
335 335 [[1, 2], [5, 8, 10, 11], [14]]
336 336 """
337 337 start = revlog.start
338 338 length = revlog.length
339 339
340 340 if len(revs) <= 1:
341 341 yield revs
342 342 return
343 343
344 344 deltachainspan = segmentspan(revlog, revs)
345 345
346 346 if deltachainspan < mingapsize:
347 347 yield revs
348 348 return
349 349
350 350 readdata = deltachainspan
351 351 chainpayload = sum(length(r) for r in revs)
352 352
353 353 if deltachainspan:
354 354 density = chainpayload / float(deltachainspan)
355 355 else:
356 356 density = 1.0
357 357
358 358 if density >= targetdensity:
359 359 yield revs
360 360 return
361 361
362 362 # Store the gaps in a heap to have them sorted by decreasing size
363 363 gaps = []
364 364 prevend = None
365 365 for i, rev in enumerate(revs):
366 366 revstart = start(rev)
367 367 revlen = length(rev)
368 368
369 369 # Skip empty revisions to form larger holes
370 370 if revlen == 0:
371 371 continue
372 372
373 373 if prevend is not None:
374 374 gapsize = revstart - prevend
375 375 # only consider holes that are large enough
376 376 if gapsize > mingapsize:
377 377 gaps.append((gapsize, i))
378 378
379 379 prevend = revstart + revlen
380 380 # sort the gaps to pop them from largest to small
381 381 gaps.sort()
382 382
383 383 # Collect the indices of the largest holes until the density is acceptable
384 384 selected = []
385 385 while gaps and density < targetdensity:
386 386 gapsize, gapidx = gaps.pop()
387 387
388 388 selected.append(gapidx)
389 389
390 390 # the gap sizes are stored as negatives to be sorted decreasingly
391 391 # by the heap
392 392 readdata -= gapsize
393 393 if readdata > 0:
394 394 density = chainpayload / float(readdata)
395 395 else:
396 396 density = 1.0
397 397 selected.sort()
398 398
399 399 # Cut the revs at collected indices
400 400 previdx = 0
401 401 for idx in selected:
402 402
403 403 chunk = _trimchunk(revlog, revs, previdx, idx)
404 404 if chunk:
405 405 yield chunk
406 406
407 407 previdx = idx
408 408
409 409 chunk = _trimchunk(revlog, revs, previdx)
410 410 if chunk:
411 411 yield chunk
412 412
413 413 def _trimchunk(revlog, revs, startidx, endidx=None):
414 414 """returns revs[startidx:endidx] without empty trailing revs
415 415
416 416 Doctest Setup
417 417 >>> revlog = _testrevlog([
418 418 ... 5, #0
419 419 ... 10, #1
420 420 ... 12, #2
421 421 ... 12, #3 (empty)
422 422 ... 17, #4
423 423 ... 21, #5
424 424 ... 21, #6 (empty)
425 425 ... ])
426 426
427 427 Contiguous cases:
428 428 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
429 429 [0, 1, 2, 3, 4, 5]
430 430 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
431 431 [0, 1, 2, 3, 4]
432 432 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
433 433 [0, 1, 2]
434 434 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
435 435 [2]
436 436 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
437 437 [3, 4, 5]
438 438 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
439 439 [3, 4]
440 440
441 441 Discontiguous cases:
442 442 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
443 443 [1, 3, 5]
444 444 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
445 445 [1]
446 446 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
447 447 [3, 5]
448 448 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
449 449 [3, 5]
450 450 """
451 451 length = revlog.length
452 452
453 453 if endidx is None:
454 454 endidx = len(revs)
455 455
456 456 # If we have a non-emtpy delta candidate, there are nothing to trim
457 457 if revs[endidx - 1] < len(revlog):
458 458 # Trim empty revs at the end, except the very first revision of a chain
459 459 while (endidx > 1
460 460 and endidx > startidx
461 461 and length(revs[endidx - 1]) == 0):
462 462 endidx -= 1
463 463
464 464 return revs[startidx:endidx]
465 465
466 466 def segmentspan(revlog, revs):
467 467 """Get the byte span of a segment of revisions
468 468
469 469 revs is a sorted array of revision numbers
470 470
471 471 >>> revlog = _testrevlog([
472 472 ... 5, #0
473 473 ... 10, #1
474 474 ... 12, #2
475 475 ... 12, #3 (empty)
476 476 ... 17, #4
477 477 ... ])
478 478
479 479 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
480 480 17
481 481 >>> segmentspan(revlog, [0, 4])
482 482 17
483 483 >>> segmentspan(revlog, [3, 4])
484 484 5
485 485 >>> segmentspan(revlog, [1, 2, 3,])
486 486 7
487 487 >>> segmentspan(revlog, [1, 3])
488 488 7
489 489 """
490 490 if not revs:
491 491 return 0
492 492 end = revlog.end(revs[-1])
493 493 return end - revlog.start(revs[0])
494 494
495 495 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
496 496 """build full text from a (base, delta) pair and other metadata"""
497 497 # special case deltas which replace entire base; no need to decode
498 498 # base revision. this neatly avoids censored bases, which throw when
499 499 # they're decoded.
500 500 hlen = struct.calcsize(">lll")
501 501 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
502 502 len(delta) - hlen):
503 503 fulltext = delta[hlen:]
504 504 else:
505 505 # deltabase is rawtext before changed by flag processors, which is
506 506 # equivalent to non-raw text
507 507 basetext = revlog.revision(baserev, _df=fh, raw=False)
508 508 fulltext = mdiff.patch(basetext, delta)
509 509
510 510 try:
511 511 res = revlog._processflags(fulltext, flags, 'read', raw=True)
512 512 fulltext, validatehash = res
513 513 if validatehash:
514 514 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
515 515 if flags & REVIDX_ISCENSORED:
516 516 raise error.StorageError(_('node %s is not censored') %
517 517 expectednode)
518 518 except error.CensoredNodeError:
519 519 # must pass the censored index flag to add censored revisions
520 520 if not flags & REVIDX_ISCENSORED:
521 521 raise
522 522 return fulltext
523 523
524 524 @attr.s(slots=True, frozen=True)
525 525 class _deltainfo(object):
526 526 distance = attr.ib()
527 527 deltalen = attr.ib()
528 528 data = attr.ib()
529 529 base = attr.ib()
530 530 chainbase = attr.ib()
531 531 chainlen = attr.ib()
532 532 compresseddeltalen = attr.ib()
533 533 snapshotdepth = attr.ib()
534 534
535 535 def isgooddeltainfo(revlog, deltainfo, revinfo):
536 536 """Returns True if the given delta is good. Good means that it is within
537 537 the disk span, disk size, and chain length bounds that we know to be
538 538 performant."""
539 539 if deltainfo is None:
540 540 return False
541 541
542 542 # - 'deltainfo.distance' is the distance from the base revision --
543 543 # bounding it limits the amount of I/O we need to do.
544 544 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
545 545 # deltas we need to apply -- bounding it limits the amount of CPU
546 546 # we consume.
547 547
548 548 textlen = revinfo.textlen
549 549 defaultmax = textlen * 4
550 550 maxdist = revlog._maxdeltachainspan
551 551 if not maxdist:
552 552 maxdist = deltainfo.distance # ensure the conditional pass
553 553 maxdist = max(maxdist, defaultmax)
554 554
555 555 # Bad delta from read span:
556 556 #
557 557 # If the span of data read is larger than the maximum allowed.
558 558 #
559 559 # In the sparse-revlog case, we rely on the associated "sparse reading"
560 560 # to avoid issue related to the span of data. In theory, it would be
561 561 # possible to build pathological revlog where delta pattern would lead
562 562 # to too many reads. However, they do not happen in practice at all. So
563 563 # we skip the span check entirely.
564 564 if not revlog._sparserevlog and maxdist < deltainfo.distance:
565 565 return False
566 566
567 567 # Bad delta from new delta size:
568 568 #
569 569 # If the delta size is larger than the target text, storing the
570 570 # delta will be inefficient.
571 571 if textlen < deltainfo.deltalen:
572 572 return False
573 573
574 574 # Bad delta from cumulated payload size:
575 575 #
576 576 # If the sum of delta get larger than K * target text length.
577 577 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
578 578 return False
579 579
580 580 # Bad delta from chain length:
581 581 #
582 582 # If the number of delta in the chain gets too high.
583 583 if (revlog._maxchainlen
584 584 and revlog._maxchainlen < deltainfo.chainlen):
585 585 return False
586 586
587 587 # bad delta from intermediate snapshot size limit
588 588 #
589 589 # If an intermediate snapshot size is higher than the limit. The
590 590 # limit exist to prevent endless chain of intermediate delta to be
591 591 # created.
592 592 if (deltainfo.snapshotdepth is not None and
593 593 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
594 594 return False
595 595
596 596 # bad delta if new intermediate snapshot is larger than the previous
597 597 # snapshot
598 598 if (deltainfo.snapshotdepth
599 599 and revlog.length(deltainfo.base) < deltainfo.deltalen):
600 600 return False
601 601
602 602 return True
603 603
604 # If a revision's full text is that much bigger than a base candidate full
605 # text's, it is very unlikely that it will produce a valid delta. We no longer
606 # consider these candidates.
607 LIMIT_BASE2TEXT = 50
608
604 609 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
605 610 """Provides group of revision to be tested as delta base
606 611
607 612 This top level function focus on emitting groups with unique and worthwhile
608 613 content. See _raw_candidate_groups for details about the group order.
609 614 """
610 615 # should we try to build a delta?
611 616 if not (len(revlog) and revlog._storedeltachains):
612 617 yield None
613 618 return
614 619
615 620 deltalength = revlog.length
616 621 deltaparent = revlog.deltaparent
622 sparse = revlog._sparserevlog
617 623 good = None
618 624
619 625 deltas_limit = textlen * LIMIT_DELTA2TEXT
620 626
621 627 tested = set([nullrev])
622 628 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
623 629 while True:
624 630 temptative = candidates.send(good)
625 631 if temptative is None:
626 632 break
627 633 group = []
628 634 for rev in temptative:
629 635 # skip over empty delta (no need to include them in a chain)
630 636 while (revlog._generaldelta
631 637 and not (rev == nullrev
632 638 or rev in tested
633 639 or deltalength(rev))):
634 640 tested.add(rev)
635 641 rev = deltaparent(rev)
636 642 # no need to try a delta against nullrev, this will be done as a
637 643 # last resort.
638 644 if rev == nullrev:
639 645 continue
640 646 # filter out revision we tested already
641 647 if rev in tested:
642 648 continue
643 649 tested.add(rev)
644 650 # filter out delta base that will never produce good delta
645 651 if deltas_limit < revlog.length(rev):
646 652 continue
653 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
654 continue
647 655 # no delta for rawtext-changing revs (see "candelta" for why)
648 656 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
649 657 continue
650 658 group.append(rev)
651 659 if group:
652 660 # XXX: in the sparse revlog case, group can become large,
653 661 # impacting performances. Some bounding or slicing mecanism
654 662 # would help to reduce this impact.
655 663 good = yield tuple(group)
656 664 yield None
657 665
658 666 def _findsnapshots(revlog, cache, start_rev):
659 667 """find snapshot from start_rev to tip"""
660 668 deltaparent = revlog.deltaparent
661 669 issnapshot = revlog.issnapshot
662 670 for rev in revlog.revs(start_rev):
663 671 if issnapshot(rev):
664 672 cache[deltaparent(rev)].append(rev)
665 673
666 674 def _refinedgroups(revlog, p1, p2, cachedelta):
667 675 good = None
668 676 # First we try to reuse a the delta contained in the bundle.
669 677 # (or from the source revlog)
670 678 #
671 679 # This logic only applies to general delta repositories and can be disabled
672 680 # through configuration. Disabling reuse source delta is useful when
673 681 # we want to make sure we recomputed "optimal" deltas.
674 682 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
675 683 # Assume what we received from the server is a good choice
676 684 # build delta will reuse the cache
677 685 good = yield (cachedelta[0],)
678 686 if good is not None:
679 687 yield None
680 688 return
681 689 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
682 690 good = yield candidates
683 691 if good is not None:
684 692 break
685 693
686 694 # If sparse revlog is enabled, we can try to refine the available deltas
687 695 if not revlog._sparserevlog:
688 696 yield None
689 697 return
690 698
691 699 # if we have a refinable value, try to refine it
692 700 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
693 701 # refine snapshot down
694 702 previous = None
695 703 while previous != good:
696 704 previous = good
697 705 base = revlog.deltaparent(good)
698 706 if base == nullrev:
699 707 break
700 708 good = yield (base,)
701 709 # refine snapshot up
702 710 #
703 711 # XXX the _findsnapshots call can be expensive and is "duplicated" with
704 712 # the one done in `_rawgroups`. Once we start working on performance,
705 713 # we should make the two logics share this computation.
706 714 snapshots = collections.defaultdict(list)
707 715 _findsnapshots(revlog, snapshots, good + 1)
708 716 previous = None
709 717 while good != previous:
710 718 previous = good
711 719 children = tuple(sorted(c for c in snapshots[good]))
712 720 good = yield children
713 721
714 722 # we have found nothing
715 723 yield None
716 724
717 725 def _rawgroups(revlog, p1, p2, cachedelta):
718 726 """Provides group of revision to be tested as delta base
719 727
720 728 This lower level function focus on emitting delta theorically interresting
721 729 without looking it any practical details.
722 730
723 731 The group order aims at providing fast or small candidates first.
724 732 """
725 733 gdelta = revlog._generaldelta
726 734 sparse = revlog._sparserevlog
727 735 curr = len(revlog)
728 736 prev = curr - 1
729 737 deltachain = lambda rev: revlog._deltachain(rev)[0]
730 738
731 739 if gdelta:
732 740 # exclude already lazy tested base if any
733 741 parents = [p for p in (p1, p2) if p != nullrev]
734 742
735 743 if not revlog._deltabothparents and len(parents) == 2:
736 744 parents.sort()
737 745 # To minimize the chance of having to build a fulltext,
738 746 # pick first whichever parent is closest to us (max rev)
739 747 yield (parents[1],)
740 748 # then the other one (min rev) if the first did not fit
741 749 yield (parents[0],)
742 750 elif len(parents) > 0:
743 751 # Test all parents (1 or 2), and keep the best candidate
744 752 yield parents
745 753
746 754 if sparse and parents:
747 755 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
748 756 # See if we can use an existing snapshot in the parent chains to use as
749 757 # a base for a new intermediate-snapshot
750 758 #
751 759 # search for snapshot in parents delta chain
752 760 # map: snapshot-level: snapshot-rev
753 761 parents_snaps = collections.defaultdict(set)
754 762 candidate_chains = [deltachain(p) for p in parents]
755 763 for chain in candidate_chains:
756 764 for idx, s in enumerate(chain):
757 765 if not revlog.issnapshot(s):
758 766 break
759 767 parents_snaps[idx].add(s)
760 768 snapfloor = min(parents_snaps[0]) + 1
761 769 _findsnapshots(revlog, snapshots, snapfloor)
762 770 # search for the highest "unrelated" revision
763 771 #
764 772 # Adding snapshots used by "unrelated" revision increase the odd we
765 773 # reuse an independant, yet better snapshot chain.
766 774 #
767 775 # XXX instead of building a set of revisions, we could lazily enumerate
768 776 # over the chains. That would be more efficient, however we stick to
769 777 # simple code for now.
770 778 all_revs = set()
771 779 for chain in candidate_chains:
772 780 all_revs.update(chain)
773 781 other = None
774 782 for r in revlog.revs(prev, snapfloor):
775 783 if r not in all_revs:
776 784 other = r
777 785 break
778 786 if other is not None:
779 787 # To avoid unfair competition, we won't use unrelated intermediate
780 788 # snapshot that are deeper than the ones from the parent delta
781 789 # chain.
782 790 max_depth = max(parents_snaps.keys())
783 791 chain = deltachain(other)
784 792 for idx, s in enumerate(chain):
785 793 if s < snapfloor:
786 794 continue
787 795 if max_depth < idx:
788 796 break
789 797 if not revlog.issnapshot(s):
790 798 break
791 799 parents_snaps[idx].add(s)
792 800 # Test them as possible intermediate snapshot base
793 801 # We test them from highest to lowest level. High level one are more
794 802 # likely to result in small delta
795 803 floor = None
796 804 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
797 805 siblings = set()
798 806 for s in snaps:
799 807 siblings.update(snapshots[s])
800 808 # Before considering making a new intermediate snapshot, we check
801 809 # if an existing snapshot, children of base we consider, would be
802 810 # suitable.
803 811 #
804 812 # It give a change to reuse a delta chain "unrelated" to the
805 813 # current revision instead of starting our own. Without such
806 814 # re-use, topological branches would keep reopening new chains.
807 815 # Creating more and more snapshot as the repository grow.
808 816
809 817 if floor is not None:
810 818 # We only do this for siblings created after the one in our
811 819 # parent's delta chain. Those created before has less chances
812 820 # to be valid base since our ancestors had to create a new
813 821 # snapshot.
814 822 siblings = [r for r in siblings if floor < r]
815 823 yield tuple(sorted(siblings))
816 824 # then test the base from our parent's delta chain.
817 825 yield tuple(sorted(snaps))
818 826 floor = min(snaps)
819 827 # No suitable base found in the parent chain, search if any full
820 828 # snapshots emitted since parent's base would be a suitable base for an
821 829 # intermediate snapshot.
822 830 #
823 831 # It give a chance to reuse a delta chain unrelated to the current
824 832 # revisions instead of starting our own. Without such re-use,
825 833 # topological branches would keep reopening new full chains. Creating
826 834 # more and more snapshot as the repository grow.
827 835 yield tuple(snapshots[nullrev])
828 836
829 837 if not sparse:
830 838 # other approach failed try against prev to hopefully save us a
831 839 # fulltext.
832 840 yield (prev,)
833 841
834 842 class deltacomputer(object):
835 843 def __init__(self, revlog):
836 844 self.revlog = revlog
837 845
838 846 def buildtext(self, revinfo, fh):
839 847 """Builds a fulltext version of a revision
840 848
841 849 revinfo: _revisioninfo instance that contains all needed info
842 850 fh: file handle to either the .i or the .d revlog file,
843 851 depending on whether it is inlined or not
844 852 """
845 853 btext = revinfo.btext
846 854 if btext[0] is not None:
847 855 return btext[0]
848 856
849 857 revlog = self.revlog
850 858 cachedelta = revinfo.cachedelta
851 859 baserev = cachedelta[0]
852 860 delta = cachedelta[1]
853 861
854 862 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
855 863 revinfo.p1, revinfo.p2,
856 864 revinfo.flags, revinfo.node)
857 865 return fulltext
858 866
859 867 def _builddeltadiff(self, base, revinfo, fh):
860 868 revlog = self.revlog
861 869 t = self.buildtext(revinfo, fh)
862 870 if revlog.iscensored(base):
863 871 # deltas based on a censored revision must replace the
864 872 # full content in one patch, so delta works everywhere
865 873 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
866 874 delta = header + t
867 875 else:
868 876 ptext = revlog.revision(base, _df=fh, raw=True)
869 877 delta = mdiff.textdiff(ptext, t)
870 878
871 879 return delta
872 880
873 881 def _builddeltainfo(self, revinfo, base, fh):
874 882 # can we use the cached delta?
875 883 delta = None
876 884 if revinfo.cachedelta:
877 885 cachebase, cachediff = revinfo.cachedelta
878 886 #check if the diff still apply
879 887 currentbase = cachebase
880 888 while (currentbase != nullrev
881 889 and currentbase != base
882 890 and self.revlog.length(currentbase) == 0):
883 891 currentbase = self.revlog.deltaparent(currentbase)
884 892 if currentbase == base:
885 893 delta = revinfo.cachedelta[1]
886 894 if delta is None:
887 895 delta = self._builddeltadiff(base, revinfo, fh)
888 896 revlog = self.revlog
889 897 header, data = revlog.compress(delta)
890 898 deltalen = len(header) + len(data)
891 899 chainbase = revlog.chainbase(base)
892 900 offset = revlog.end(len(revlog) - 1)
893 901 dist = deltalen + offset - revlog.start(chainbase)
894 902 if revlog._generaldelta:
895 903 deltabase = base
896 904 else:
897 905 deltabase = chainbase
898 906 chainlen, compresseddeltalen = revlog._chaininfo(base)
899 907 chainlen += 1
900 908 compresseddeltalen += deltalen
901 909
902 910 revlog = self.revlog
903 911 snapshotdepth = None
904 912 if deltabase == nullrev:
905 913 snapshotdepth = 0
906 914 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
907 915 # A delta chain should always be one full snapshot,
908 916 # zero or more semi-snapshots, and zero or more deltas
909 917 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
910 918 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
911 919 snapshotdepth = len(revlog._deltachain(deltabase)[0])
912 920
913 921 return _deltainfo(dist, deltalen, (header, data), deltabase,
914 922 chainbase, chainlen, compresseddeltalen,
915 923 snapshotdepth)
916 924
917 925 def _fullsnapshotinfo(self, fh, revinfo):
918 926 curr = len(self.revlog)
919 927 rawtext = self.buildtext(revinfo, fh)
920 928 data = self.revlog.compress(rawtext)
921 929 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
922 930 deltabase = chainbase = curr
923 931 snapshotdepth = 0
924 932 chainlen = 1
925 933
926 934 return _deltainfo(dist, deltalen, data, deltabase,
927 935 chainbase, chainlen, compresseddeltalen,
928 936 snapshotdepth)
929 937
930 938 def finddeltainfo(self, revinfo, fh):
931 939 """Find an acceptable delta against a candidate revision
932 940
933 941 revinfo: information about the revision (instance of _revisioninfo)
934 942 fh: file handle to either the .i or the .d revlog file,
935 943 depending on whether it is inlined or not
936 944
937 945 Returns the first acceptable candidate revision, as ordered by
938 946 _candidategroups
939 947
940 948 If no suitable deltabase is found, we return delta info for a full
941 949 snapshot.
942 950 """
943 951 if not revinfo.textlen:
944 952 return self._fullsnapshotinfo(fh, revinfo)
945 953
946 954 # no delta for flag processor revision (see "candelta" for why)
947 955 # not calling candelta since only one revision needs test, also to
948 956 # avoid overhead fetching flags again.
949 957 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
950 958 return self._fullsnapshotinfo(fh, revinfo)
951 959
952 960 cachedelta = revinfo.cachedelta
953 961 p1 = revinfo.p1
954 962 p2 = revinfo.p2
955 963 revlog = self.revlog
956 964
957 965 deltainfo = None
958 966 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
959 967 groups = _candidategroups(self.revlog, revinfo.textlen,
960 968 p1r, p2r, cachedelta)
961 969 candidaterevs = next(groups)
962 970 while candidaterevs is not None:
963 971 nominateddeltas = []
964 972 if deltainfo is not None:
965 973 # if we already found a good delta,
966 974 # challenge it against refined candidates
967 975 nominateddeltas.append(deltainfo)
968 976 for candidaterev in candidaterevs:
969 977 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
970 978 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
971 979 nominateddeltas.append(candidatedelta)
972 980 if nominateddeltas:
973 981 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
974 982 if deltainfo is not None:
975 983 candidaterevs = groups.send(deltainfo.base)
976 984 else:
977 985 candidaterevs = next(groups)
978 986
979 987 if deltainfo is None:
980 988 deltainfo = self._fullsnapshotinfo(fh, revinfo)
981 989 return deltainfo
@@ -1,567 +1,567 b''
1 1 #require serve no-reposimplestore no-chg
2 2
3 3 #testcases stream-legacy stream-bundle2
4 4
5 5 #if stream-legacy
6 6 $ cat << EOF >> $HGRCPATH
7 7 > [server]
8 8 > bundle2.stream = no
9 9 > EOF
10 10 #endif
11 11
12 12 Initialize repository
13 13 the status call is to check for issue5130
14 14
15 15 $ hg init server
16 16 $ cd server
17 17 $ touch foo
18 18 $ hg -q commit -A -m initial
19 19 >>> for i in range(1024):
20 20 ... with open(str(i), 'wb') as fh:
21 21 ... fh.write(b"%d" % i) and None
22 22 $ hg -q commit -A -m 'add a lot of files'
23 23 $ hg st
24 24 $ hg --config server.uncompressed=false serve -p $HGPORT -d --pid-file=hg.pid
25 25 $ cat hg.pid > $DAEMON_PIDS
26 26 $ cd ..
27 27
28 28 Cannot stream clone when server.uncompressed is set
29 29
30 30 $ get-with-headers.py $LOCALIP:$HGPORT '?cmd=stream_out'
31 31 200 Script output follows
32 32
33 33 1
34 34
35 35 #if stream-legacy
36 36 $ hg debugcapabilities http://localhost:$HGPORT
37 37 Main capabilities:
38 38 batch
39 39 branchmap
40 40 $USUAL_BUNDLE2_CAPS_SERVER$
41 41 changegroupsubset
42 42 compression=$BUNDLE2_COMPRESSIONS$
43 43 getbundle
44 44 httpheader=1024
45 45 httpmediatype=0.1rx,0.1tx,0.2tx
46 46 known
47 47 lookup
48 48 pushkey
49 49 unbundle=HG10GZ,HG10BZ,HG10UN
50 50 unbundlehash
51 51 Bundle2 capabilities:
52 52 HG20
53 53 bookmarks
54 54 changegroup
55 55 01
56 56 02
57 57 digests
58 58 md5
59 59 sha1
60 60 sha512
61 61 error
62 62 abort
63 63 unsupportedcontent
64 64 pushraced
65 65 pushkey
66 66 hgtagsfnodes
67 67 listkeys
68 68 phases
69 69 heads
70 70 pushkey
71 71 remote-changegroup
72 72 http
73 73 https
74 74 rev-branch-cache
75 75
76 76 $ hg clone --stream -U http://localhost:$HGPORT server-disabled
77 77 warning: stream clone requested but server has them disabled
78 78 requesting all changes
79 79 adding changesets
80 80 adding manifests
81 81 adding file changes
82 82 added 2 changesets with 1025 changes to 1025 files
83 83 new changesets 96ee1d7354c4:c17445101a72
84 84
85 85 $ get-with-headers.py $LOCALIP:$HGPORT '?cmd=getbundle' content-type --bodyfile body --hgproto 0.2 --requestheader "x-hgarg-1=bundlecaps=HG20%2Cbundle2%3DHG20%250Abookmarks%250Achangegroup%253D01%252C02%250Adigests%253Dmd5%252Csha1%252Csha512%250Aerror%253Dabort%252Cunsupportedcontent%252Cpushraced%252Cpushkey%250Ahgtagsfnodes%250Alistkeys%250Aphases%253Dheads%250Apushkey%250Aremote-changegroup%253Dhttp%252Chttps&cg=0&common=0000000000000000000000000000000000000000&heads=c17445101a72edac06facd130d14808dfbd5c7c2&stream=1"
86 86 200 Script output follows
87 87 content-type: application/mercurial-0.2
88 88
89 89
90 90 $ f --size body --hexdump --bytes 100
91 91 body: size=232
92 92 0000: 04 6e 6f 6e 65 48 47 32 30 00 00 00 00 00 00 00 |.noneHG20.......|
93 93 0010: cf 0b 45 52 52 4f 52 3a 41 42 4f 52 54 00 00 00 |..ERROR:ABORT...|
94 94 0020: 00 01 01 07 3c 04 72 6d 65 73 73 61 67 65 73 74 |....<.rmessagest|
95 95 0030: 72 65 61 6d 20 64 61 74 61 20 72 65 71 75 65 73 |ream data reques|
96 96 0040: 74 65 64 20 62 75 74 20 73 65 72 76 65 72 20 64 |ted but server d|
97 97 0050: 6f 65 73 20 6e 6f 74 20 61 6c 6c 6f 77 20 74 68 |oes not allow th|
98 98 0060: 69 73 20 66 |is f|
99 99
100 100 #endif
101 101 #if stream-bundle2
102 102 $ hg debugcapabilities http://localhost:$HGPORT
103 103 Main capabilities:
104 104 batch
105 105 branchmap
106 106 $USUAL_BUNDLE2_CAPS_SERVER$
107 107 changegroupsubset
108 108 compression=$BUNDLE2_COMPRESSIONS$
109 109 getbundle
110 110 httpheader=1024
111 111 httpmediatype=0.1rx,0.1tx,0.2tx
112 112 known
113 113 lookup
114 114 pushkey
115 115 unbundle=HG10GZ,HG10BZ,HG10UN
116 116 unbundlehash
117 117 Bundle2 capabilities:
118 118 HG20
119 119 bookmarks
120 120 changegroup
121 121 01
122 122 02
123 123 digests
124 124 md5
125 125 sha1
126 126 sha512
127 127 error
128 128 abort
129 129 unsupportedcontent
130 130 pushraced
131 131 pushkey
132 132 hgtagsfnodes
133 133 listkeys
134 134 phases
135 135 heads
136 136 pushkey
137 137 remote-changegroup
138 138 http
139 139 https
140 140 rev-branch-cache
141 141
142 142 $ hg clone --stream -U http://localhost:$HGPORT server-disabled
143 143 warning: stream clone requested but server has them disabled
144 144 requesting all changes
145 145 adding changesets
146 146 adding manifests
147 147 adding file changes
148 148 added 2 changesets with 1025 changes to 1025 files
149 149 new changesets 96ee1d7354c4:c17445101a72
150 150
151 151 $ get-with-headers.py $LOCALIP:$HGPORT '?cmd=getbundle' content-type --bodyfile body --hgproto 0.2 --requestheader "x-hgarg-1=bundlecaps=HG20%2Cbundle2%3DHG20%250Abookmarks%250Achangegroup%253D01%252C02%250Adigests%253Dmd5%252Csha1%252Csha512%250Aerror%253Dabort%252Cunsupportedcontent%252Cpushraced%252Cpushkey%250Ahgtagsfnodes%250Alistkeys%250Aphases%253Dheads%250Apushkey%250Aremote-changegroup%253Dhttp%252Chttps&cg=0&common=0000000000000000000000000000000000000000&heads=c17445101a72edac06facd130d14808dfbd5c7c2&stream=1"
152 152 200 Script output follows
153 153 content-type: application/mercurial-0.2
154 154
155 155
156 156 $ f --size body --hexdump --bytes 100
157 157 body: size=232
158 158 0000: 04 6e 6f 6e 65 48 47 32 30 00 00 00 00 00 00 00 |.noneHG20.......|
159 159 0010: cf 0b 45 52 52 4f 52 3a 41 42 4f 52 54 00 00 00 |..ERROR:ABORT...|
160 160 0020: 00 01 01 07 3c 04 72 6d 65 73 73 61 67 65 73 74 |....<.rmessagest|
161 161 0030: 72 65 61 6d 20 64 61 74 61 20 72 65 71 75 65 73 |ream data reques|
162 162 0040: 74 65 64 20 62 75 74 20 73 65 72 76 65 72 20 64 |ted but server d|
163 163 0050: 6f 65 73 20 6e 6f 74 20 61 6c 6c 6f 77 20 74 68 |oes not allow th|
164 164 0060: 69 73 20 66 |is f|
165 165
166 166 #endif
167 167
168 168 $ killdaemons.py
169 169 $ cd server
170 170 $ hg serve -p $HGPORT -d --pid-file=hg.pid
171 171 $ cat hg.pid > $DAEMON_PIDS
172 172 $ cd ..
173 173
174 174 Basic clone
175 175
176 176 #if stream-legacy
177 177 $ hg clone --stream -U http://localhost:$HGPORT clone1
178 178 streaming all changes
179 179 1027 files to transfer, 96.3 KB of data
180 180 transferred 96.3 KB in * seconds (*/sec) (glob)
181 181 searching for changes
182 182 no changes found
183 183 #endif
184 184 #if stream-bundle2
185 185 $ hg clone --stream -U http://localhost:$HGPORT clone1
186 186 streaming all changes
187 1030 files to transfer, 96.4 KB of data
188 transferred 96.4 KB in * seconds (* */sec) (glob)
187 1030 files to transfer, 96.5 KB of data
188 transferred 96.5 KB in * seconds (* */sec) (glob)
189 189
190 190 $ ls -1 clone1/.hg/cache
191 191 branch2-served
192 192 rbc-names-v1
193 193 rbc-revs-v1
194 194 #endif
195 195
196 196 getbundle requests with stream=1 are uncompressed
197 197
198 198 $ get-with-headers.py $LOCALIP:$HGPORT '?cmd=getbundle' content-type --bodyfile body --hgproto '0.1 0.2 comp=zlib,none' --requestheader "x-hgarg-1=bundlecaps=HG20%2Cbundle2%3DHG20%250Abookmarks%250Achangegroup%253D01%252C02%250Adigests%253Dmd5%252Csha1%252Csha512%250Aerror%253Dabort%252Cunsupportedcontent%252Cpushraced%252Cpushkey%250Ahgtagsfnodes%250Alistkeys%250Aphases%253Dheads%250Apushkey%250Aremote-changegroup%253Dhttp%252Chttps&cg=0&common=0000000000000000000000000000000000000000&heads=c17445101a72edac06facd130d14808dfbd5c7c2&stream=1"
199 199 200 Script output follows
200 200 content-type: application/mercurial-0.2
201 201
202 202
203 203 $ f --size --hex --bytes 256 body
204 body: size=112245
204 body: size=112262
205 205 0000: 04 6e 6f 6e 65 48 47 32 30 00 00 00 00 00 00 00 |.noneHG20.......|
206 206 0010: 7f 07 53 54 52 45 41 4d 32 00 00 00 00 03 00 09 |..STREAM2.......|
207 207 0020: 05 09 04 0c 44 62 79 74 65 63 6f 75 6e 74 39 38 |....Dbytecount98|
208 0030: 37 35 38 66 69 6c 65 63 6f 75 6e 74 31 30 33 30 |758filecount1030|
208 0030: 37 37 35 66 69 6c 65 63 6f 75 6e 74 31 30 33 30 |775filecount1030|
209 209 0040: 72 65 71 75 69 72 65 6d 65 6e 74 73 64 6f 74 65 |requirementsdote|
210 210 0050: 6e 63 6f 64 65 25 32 43 66 6e 63 61 63 68 65 25 |ncode%2Cfncache%|
211 211 0060: 32 43 67 65 6e 65 72 61 6c 64 65 6c 74 61 25 32 |2Cgeneraldelta%2|
212 212 0070: 43 72 65 76 6c 6f 67 76 31 25 32 43 73 70 61 72 |Crevlogv1%2Cspar|
213 213 0080: 73 65 72 65 76 6c 6f 67 25 32 43 73 74 6f 72 65 |serevlog%2Cstore|
214 214 0090: 00 00 80 00 73 08 42 64 61 74 61 2f 30 2e 69 00 |....s.Bdata/0.i.|
215 215 00a0: 03 00 01 00 00 00 00 00 00 00 02 00 00 00 01 00 |................|
216 216 00b0: 00 00 00 00 00 00 01 ff ff ff ff ff ff ff ff 80 |................|
217 217 00c0: 29 63 a0 49 d3 23 87 bf ce fe 56 67 92 67 2c 69 |)c.I.#....Vg.g,i|
218 218 00d0: d1 ec 39 00 00 00 00 00 00 00 00 00 00 00 00 75 |..9............u|
219 219 00e0: 30 73 08 42 64 61 74 61 2f 31 2e 69 00 03 00 01 |0s.Bdata/1.i....|
220 220 00f0: 00 00 00 00 00 00 00 02 00 00 00 01 00 00 00 00 |................|
221 221
222 222 --uncompressed is an alias to --stream
223 223
224 224 #if stream-legacy
225 225 $ hg clone --uncompressed -U http://localhost:$HGPORT clone1-uncompressed
226 226 streaming all changes
227 227 1027 files to transfer, 96.3 KB of data
228 228 transferred 96.3 KB in * seconds (*/sec) (glob)
229 229 searching for changes
230 230 no changes found
231 231 #endif
232 232 #if stream-bundle2
233 233 $ hg clone --uncompressed -U http://localhost:$HGPORT clone1-uncompressed
234 234 streaming all changes
235 1030 files to transfer, 96.4 KB of data
236 transferred 96.4 KB in * seconds (* */sec) (glob)
235 1030 files to transfer, 96.5 KB of data
236 transferred 96.5 KB in * seconds (* */sec) (glob)
237 237 #endif
238 238
239 239 Clone with background file closing enabled
240 240
241 241 #if stream-legacy
242 242 $ hg --debug --config worker.backgroundclose=true --config worker.backgroundcloseminfilecount=1 clone --stream -U http://localhost:$HGPORT clone-background | grep -v adding
243 243 using http://localhost:$HGPORT/
244 244 sending capabilities command
245 245 sending branchmap command
246 246 streaming all changes
247 247 sending stream_out command
248 248 1027 files to transfer, 96.3 KB of data
249 249 starting 4 threads for background file closing
250 250 updating the branch cache
251 251 transferred 96.3 KB in * seconds (*/sec) (glob)
252 252 query 1; heads
253 253 sending batch command
254 254 searching for changes
255 255 all remote heads known locally
256 256 no changes found
257 257 sending getbundle command
258 258 bundle2-input-bundle: with-transaction
259 259 bundle2-input-part: "listkeys" (params: 1 mandatory) supported
260 260 bundle2-input-part: "phase-heads" supported
261 261 bundle2-input-part: total payload size 24
262 262 bundle2-input-bundle: 1 parts total
263 263 checking for updated bookmarks
264 264 (sent 5 HTTP requests and * bytes; received * bytes in responses) (glob)
265 265 #endif
266 266 #if stream-bundle2
267 267 $ hg --debug --config worker.backgroundclose=true --config worker.backgroundcloseminfilecount=1 clone --stream -U http://localhost:$HGPORT clone-background | grep -v adding
268 268 using http://localhost:$HGPORT/
269 269 sending capabilities command
270 270 query 1; heads
271 271 sending batch command
272 272 streaming all changes
273 273 sending getbundle command
274 274 bundle2-input-bundle: with-transaction
275 275 bundle2-input-part: "stream2" (params: 3 mandatory) supported
276 276 applying stream bundle
277 1030 files to transfer, 96.4 KB of data
277 1030 files to transfer, 96.5 KB of data
278 278 starting 4 threads for background file closing
279 279 starting 4 threads for background file closing
280 280 updating the branch cache
281 transferred 96.4 KB in * seconds (* */sec) (glob)
282 bundle2-input-part: total payload size 112077
281 transferred 96.5 KB in * seconds (* */sec) (glob)
282 bundle2-input-part: total payload size 112094
283 283 bundle2-input-part: "listkeys" (params: 1 mandatory) supported
284 284 bundle2-input-bundle: 1 parts total
285 285 checking for updated bookmarks
286 286 (sent 3 HTTP requests and * bytes; received * bytes in responses) (glob)
287 287 #endif
288 288
289 289 Cannot stream clone when there are secret changesets
290 290
291 291 $ hg -R server phase --force --secret -r tip
292 292 $ hg clone --stream -U http://localhost:$HGPORT secret-denied
293 293 warning: stream clone requested but server has them disabled
294 294 requesting all changes
295 295 adding changesets
296 296 adding manifests
297 297 adding file changes
298 298 added 1 changesets with 1 changes to 1 files
299 299 new changesets 96ee1d7354c4
300 300
301 301 $ killdaemons.py
302 302
303 303 Streaming of secrets can be overridden by server config
304 304
305 305 $ cd server
306 306 $ hg serve --config server.uncompressedallowsecret=true -p $HGPORT -d --pid-file=hg.pid
307 307 $ cat hg.pid > $DAEMON_PIDS
308 308 $ cd ..
309 309
310 310 #if stream-legacy
311 311 $ hg clone --stream -U http://localhost:$HGPORT secret-allowed
312 312 streaming all changes
313 313 1027 files to transfer, 96.3 KB of data
314 314 transferred 96.3 KB in * seconds (*/sec) (glob)
315 315 searching for changes
316 316 no changes found
317 317 #endif
318 318 #if stream-bundle2
319 319 $ hg clone --stream -U http://localhost:$HGPORT secret-allowed
320 320 streaming all changes
321 1030 files to transfer, 96.4 KB of data
322 transferred 96.4 KB in * seconds (* */sec) (glob)
321 1030 files to transfer, 96.5 KB of data
322 transferred 96.5 KB in * seconds (* */sec) (glob)
323 323 #endif
324 324
325 325 $ killdaemons.py
326 326
327 327 Verify interaction between preferuncompressed and secret presence
328 328
329 329 $ cd server
330 330 $ hg serve --config server.preferuncompressed=true -p $HGPORT -d --pid-file=hg.pid
331 331 $ cat hg.pid > $DAEMON_PIDS
332 332 $ cd ..
333 333
334 334 $ hg clone -U http://localhost:$HGPORT preferuncompressed-secret
335 335 requesting all changes
336 336 adding changesets
337 337 adding manifests
338 338 adding file changes
339 339 added 1 changesets with 1 changes to 1 files
340 340 new changesets 96ee1d7354c4
341 341
342 342 $ killdaemons.py
343 343
344 344 Clone not allowed when full bundles disabled and can't serve secrets
345 345
346 346 $ cd server
347 347 $ hg serve --config server.disablefullbundle=true -p $HGPORT -d --pid-file=hg.pid
348 348 $ cat hg.pid > $DAEMON_PIDS
349 349 $ cd ..
350 350
351 351 $ hg clone --stream http://localhost:$HGPORT secret-full-disabled
352 352 warning: stream clone requested but server has them disabled
353 353 requesting all changes
354 354 remote: abort: server has pull-based clones disabled
355 355 abort: pull failed on remote
356 356 (remove --pull if specified or upgrade Mercurial)
357 357 [255]
358 358
359 359 Local stream clone with secrets involved
360 360 (This is just a test over behavior: if you have access to the repo's files,
361 361 there is no security so it isn't important to prevent a clone here.)
362 362
363 363 $ hg clone -U --stream server local-secret
364 364 warning: stream clone requested but server has them disabled
365 365 requesting all changes
366 366 adding changesets
367 367 adding manifests
368 368 adding file changes
369 369 added 1 changesets with 1 changes to 1 files
370 370 new changesets 96ee1d7354c4
371 371
372 372 Stream clone while repo is changing:
373 373
374 374 $ mkdir changing
375 375 $ cd changing
376 376
377 377 extension for delaying the server process so we reliably can modify the repo
378 378 while cloning
379 379
380 380 $ cat > delayer.py <<EOF
381 381 > import time
382 382 > from mercurial import extensions, vfs
383 383 > def __call__(orig, self, path, *args, **kwargs):
384 384 > if path == 'data/f1.i':
385 385 > time.sleep(2)
386 386 > return orig(self, path, *args, **kwargs)
387 387 > extensions.wrapfunction(vfs.vfs, '__call__', __call__)
388 388 > EOF
389 389
390 390 prepare repo with small and big file to cover both code paths in emitrevlogdata
391 391
392 392 $ hg init repo
393 393 $ touch repo/f1
394 394 $ $TESTDIR/seq.py 50000 > repo/f2
395 395 $ hg -R repo ci -Aqm "0"
396 396 $ hg serve -R repo -p $HGPORT1 -d --pid-file=hg.pid --config extensions.delayer=delayer.py
397 397 $ cat hg.pid >> $DAEMON_PIDS
398 398
399 399 clone while modifying the repo between stating file with write lock and
400 400 actually serving file content
401 401
402 402 $ hg clone -q --stream -U http://localhost:$HGPORT1 clone &
403 403 $ sleep 1
404 404 $ echo >> repo/f1
405 405 $ echo >> repo/f2
406 406 $ hg -R repo ci -m "1"
407 407 $ wait
408 408 $ hg -R clone id
409 409 000000000000
410 410 $ cd ..
411 411
412 412 Stream repository with bookmarks
413 413 --------------------------------
414 414
415 415 (revert introduction of secret changeset)
416 416
417 417 $ hg -R server phase --draft 'secret()'
418 418
419 419 add a bookmark
420 420
421 421 $ hg -R server bookmark -r tip some-bookmark
422 422
423 423 clone it
424 424
425 425 #if stream-legacy
426 426 $ hg clone --stream http://localhost:$HGPORT with-bookmarks
427 427 streaming all changes
428 428 1027 files to transfer, 96.3 KB of data
429 429 transferred 96.3 KB in * seconds (*) (glob)
430 430 searching for changes
431 431 no changes found
432 432 updating to branch default
433 433 1025 files updated, 0 files merged, 0 files removed, 0 files unresolved
434 434 #endif
435 435 #if stream-bundle2
436 436 $ hg clone --stream http://localhost:$HGPORT with-bookmarks
437 437 streaming all changes
438 438 1033 files to transfer, 96.6 KB of data
439 439 transferred 96.6 KB in * seconds (* */sec) (glob)
440 440 updating to branch default
441 441 1025 files updated, 0 files merged, 0 files removed, 0 files unresolved
442 442 #endif
443 443 $ hg -R with-bookmarks bookmarks
444 444 some-bookmark 1:c17445101a72
445 445
446 446 Stream repository with phases
447 447 -----------------------------
448 448
449 449 Clone as publishing
450 450
451 451 $ hg -R server phase -r 'all()'
452 452 0: draft
453 453 1: draft
454 454
455 455 #if stream-legacy
456 456 $ hg clone --stream http://localhost:$HGPORT phase-publish
457 457 streaming all changes
458 458 1027 files to transfer, 96.3 KB of data
459 459 transferred 96.3 KB in * seconds (*) (glob)
460 460 searching for changes
461 461 no changes found
462 462 updating to branch default
463 463 1025 files updated, 0 files merged, 0 files removed, 0 files unresolved
464 464 #endif
465 465 #if stream-bundle2
466 466 $ hg clone --stream http://localhost:$HGPORT phase-publish
467 467 streaming all changes
468 468 1033 files to transfer, 96.6 KB of data
469 469 transferred 96.6 KB in * seconds (* */sec) (glob)
470 470 updating to branch default
471 471 1025 files updated, 0 files merged, 0 files removed, 0 files unresolved
472 472 #endif
473 473 $ hg -R phase-publish phase -r 'all()'
474 474 0: public
475 475 1: public
476 476
477 477 Clone as non publishing
478 478
479 479 $ cat << EOF >> server/.hg/hgrc
480 480 > [phases]
481 481 > publish = False
482 482 > EOF
483 483 $ killdaemons.py
484 484 $ hg -R server serve -p $HGPORT -d --pid-file=hg.pid
485 485 $ cat hg.pid > $DAEMON_PIDS
486 486
487 487 #if stream-legacy
488 488
489 489 With v1 of the stream protocol, changeset are always cloned as public. It make
490 490 stream v1 unsuitable for non-publishing repository.
491 491
492 492 $ hg clone --stream http://localhost:$HGPORT phase-no-publish
493 493 streaming all changes
494 494 1027 files to transfer, 96.3 KB of data
495 495 transferred 96.3 KB in * seconds (*) (glob)
496 496 searching for changes
497 497 no changes found
498 498 updating to branch default
499 499 1025 files updated, 0 files merged, 0 files removed, 0 files unresolved
500 500 $ hg -R phase-no-publish phase -r 'all()'
501 501 0: public
502 502 1: public
503 503 #endif
504 504 #if stream-bundle2
505 505 $ hg clone --stream http://localhost:$HGPORT phase-no-publish
506 506 streaming all changes
507 507 1034 files to transfer, 96.7 KB of data
508 508 transferred 96.7 KB in * seconds (* */sec) (glob)
509 509 updating to branch default
510 510 1025 files updated, 0 files merged, 0 files removed, 0 files unresolved
511 511 $ hg -R phase-no-publish phase -r 'all()'
512 512 0: draft
513 513 1: draft
514 514 #endif
515 515
516 516 $ killdaemons.py
517 517
518 518 #if stream-legacy
519 519
520 520 With v1 of the stream protocol, changeset are always cloned as public. There's
521 521 no obsolescence markers exchange in stream v1.
522 522
523 523 #endif
524 524 #if stream-bundle2
525 525
526 526 Stream repository with obsolescence
527 527 -----------------------------------
528 528
529 529 Clone non-publishing with obsolescence
530 530
531 531 $ cat >> $HGRCPATH << EOF
532 532 > [experimental]
533 533 > evolution=all
534 534 > EOF
535 535
536 536 $ cd server
537 537 $ echo foo > foo
538 538 $ hg -q commit -m 'about to be pruned'
539 539 $ hg debugobsolete `hg log -r . -T '{node}'` -d '0 0' -u test --record-parents
540 540 obsoleted 1 changesets
541 541 $ hg up null -q
542 542 $ hg log -T '{rev}: {phase}\n'
543 543 1: draft
544 544 0: draft
545 545 $ hg serve -p $HGPORT -d --pid-file=hg.pid
546 546 $ cat hg.pid > $DAEMON_PIDS
547 547 $ cd ..
548 548
549 549 $ hg clone -U --stream http://localhost:$HGPORT with-obsolescence
550 550 streaming all changes
551 551 1035 files to transfer, 97.1 KB of data
552 552 transferred 97.1 KB in * seconds (* */sec) (glob)
553 553 $ hg -R with-obsolescence log -T '{rev}: {phase}\n'
554 554 1: draft
555 555 0: draft
556 556 $ hg debugobsolete -R with-obsolescence
557 557 50382b884f66690b7045cac93a540cba4d4c906f 0 {c17445101a72edac06facd130d14808dfbd5c7c2} (Thu Jan 01 00:00:00 1970 +0000) {'user': 'test'}
558 558
559 559 $ hg clone -U --stream --config experimental.evolution=0 http://localhost:$HGPORT with-obsolescence-no-evolution
560 560 streaming all changes
561 561 remote: abort: server has obsolescence markers, but client cannot receive them via stream clone
562 562 abort: pull failed on remote
563 563 [255]
564 564
565 565 $ killdaemons.py
566 566
567 567 #endif
General Comments 0
You need to be logged in to leave comments. Login now