##// END OF EJS Templates
interfaces: create a new folder for interfaces and move repository.py in it...
Pulkit Goyal -
r43078:268662aa default
parent child Browse files
Show More

The requested changes are too big and content was truncated. Show full diff

1 NO CONTENT: new file 100644
@@ -1,748 +1,749
1 1 #!/usr/bin/env python
2 2
3 3 from __future__ import absolute_import, print_function
4 4
5 5 import ast
6 6 import collections
7 7 import os
8 8 import sys
9 9
10 10 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
11 11 # to work when run from a virtualenv. The modules were chosen empirically
12 12 # so that the return value matches the return value without virtualenv.
13 13 if True: # disable lexical sorting checks
14 14 try:
15 15 import BaseHTTPServer as basehttpserver
16 16 except ImportError:
17 17 basehttpserver = None
18 18 import zlib
19 19
20 20 import testparseutil
21 21
22 22 # Whitelist of modules that symbols can be directly imported from.
23 23 allowsymbolimports = (
24 24 '__future__',
25 25 'bzrlib',
26 26 'hgclient',
27 27 'mercurial',
28 28 'mercurial.hgweb.common',
29 29 'mercurial.hgweb.request',
30 30 'mercurial.i18n',
31 'mercurial.interfaces',
31 32 'mercurial.node',
32 33 # for revlog to re-export constant to extensions
33 34 'mercurial.revlogutils.constants',
34 35 'mercurial.revlogutils.flagutil',
35 36 # for cffi modules to re-export pure functions
36 37 'mercurial.pure.base85',
37 38 'mercurial.pure.bdiff',
38 39 'mercurial.pure.mpatch',
39 40 'mercurial.pure.osutil',
40 41 'mercurial.pure.parsers',
41 42 # third-party imports should be directly imported
42 43 'mercurial.thirdparty',
43 44 'mercurial.thirdparty.attr',
44 45 'mercurial.thirdparty.zope',
45 46 'mercurial.thirdparty.zope.interface',
46 47 )
47 48
48 49 # Whitelist of symbols that can be directly imported.
49 50 directsymbols = (
50 51 'demandimport',
51 52 )
52 53
53 54 # Modules that must be aliased because they are commonly confused with
54 55 # common variables and can create aliasing and readability issues.
55 56 requirealias = {
56 57 'ui': 'uimod',
57 58 }
58 59
59 60 def usingabsolute(root):
60 61 """Whether absolute imports are being used."""
61 62 if sys.version_info[0] >= 3:
62 63 return True
63 64
64 65 for node in ast.walk(root):
65 66 if isinstance(node, ast.ImportFrom):
66 67 if node.module == '__future__':
67 68 for n in node.names:
68 69 if n.name == 'absolute_import':
69 70 return True
70 71
71 72 return False
72 73
73 74 def walklocal(root):
74 75 """Recursively yield all descendant nodes but not in a different scope"""
75 76 todo = collections.deque(ast.iter_child_nodes(root))
76 77 yield root, False
77 78 while todo:
78 79 node = todo.popleft()
79 80 newscope = isinstance(node, ast.FunctionDef)
80 81 if not newscope:
81 82 todo.extend(ast.iter_child_nodes(node))
82 83 yield node, newscope
83 84
84 85 def dotted_name_of_path(path):
85 86 """Given a relative path to a source file, return its dotted module name.
86 87
87 88 >>> dotted_name_of_path('mercurial/error.py')
88 89 'mercurial.error'
89 90 >>> dotted_name_of_path('zlibmodule.so')
90 91 'zlib'
91 92 """
92 93 parts = path.replace(os.sep, '/').split('/')
93 94 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
94 95 if parts[-1].endswith('module'):
95 96 parts[-1] = parts[-1][:-6]
96 97 return '.'.join(parts)
97 98
98 99 def fromlocalfunc(modulename, localmods):
99 100 """Get a function to examine which locally defined module the
100 101 target source imports via a specified name.
101 102
102 103 `modulename` is an `dotted_name_of_path()`-ed source file path,
103 104 which may have `.__init__` at the end of it, of the target source.
104 105
105 106 `localmods` is a set of absolute `dotted_name_of_path()`-ed source file
106 107 paths of locally defined (= Mercurial specific) modules.
107 108
108 109 This function assumes that module names not existing in
109 110 `localmods` are from the Python standard library.
110 111
111 112 This function returns the function, which takes `name` argument,
112 113 and returns `(absname, dottedpath, hassubmod)` tuple if `name`
113 114 matches against locally defined module. Otherwise, it returns
114 115 False.
115 116
116 117 It is assumed that `name` doesn't have `.__init__`.
117 118
118 119 `absname` is an absolute module name of specified `name`
119 120 (e.g. "hgext.convert"). This can be used to compose prefix for sub
120 121 modules or so.
121 122
122 123 `dottedpath` is a `dotted_name_of_path()`-ed source file path
123 124 (e.g. "hgext.convert.__init__") of `name`. This is used to look
124 125 module up in `localmods` again.
125 126
126 127 `hassubmod` is whether it may have sub modules under it (for
127 128 convenient, even though this is also equivalent to "absname !=
128 129 dottednpath")
129 130
130 131 >>> localmods = {'foo.__init__', 'foo.foo1',
131 132 ... 'foo.bar.__init__', 'foo.bar.bar1',
132 133 ... 'baz.__init__', 'baz.baz1'}
133 134 >>> fromlocal = fromlocalfunc('foo.xxx', localmods)
134 135 >>> # relative
135 136 >>> fromlocal('foo1')
136 137 ('foo.foo1', 'foo.foo1', False)
137 138 >>> fromlocal('bar')
138 139 ('foo.bar', 'foo.bar.__init__', True)
139 140 >>> fromlocal('bar.bar1')
140 141 ('foo.bar.bar1', 'foo.bar.bar1', False)
141 142 >>> # absolute
142 143 >>> fromlocal('baz')
143 144 ('baz', 'baz.__init__', True)
144 145 >>> fromlocal('baz.baz1')
145 146 ('baz.baz1', 'baz.baz1', False)
146 147 >>> # unknown = maybe standard library
147 148 >>> fromlocal('os')
148 149 False
149 150 >>> fromlocal(None, 1)
150 151 ('foo', 'foo.__init__', True)
151 152 >>> fromlocal('foo1', 1)
152 153 ('foo.foo1', 'foo.foo1', False)
153 154 >>> fromlocal2 = fromlocalfunc('foo.xxx.yyy', localmods)
154 155 >>> fromlocal2(None, 2)
155 156 ('foo', 'foo.__init__', True)
156 157 >>> fromlocal2('bar2', 1)
157 158 False
158 159 >>> fromlocal2('bar', 2)
159 160 ('foo.bar', 'foo.bar.__init__', True)
160 161 """
161 162 if not isinstance(modulename, str):
162 163 modulename = modulename.decode('ascii')
163 164 prefix = '.'.join(modulename.split('.')[:-1])
164 165 if prefix:
165 166 prefix += '.'
166 167 def fromlocal(name, level=0):
167 168 # name is false value when relative imports are used.
168 169 if not name:
169 170 # If relative imports are used, level must not be absolute.
170 171 assert level > 0
171 172 candidates = ['.'.join(modulename.split('.')[:-level])]
172 173 else:
173 174 if not level:
174 175 # Check relative name first.
175 176 candidates = [prefix + name, name]
176 177 else:
177 178 candidates = ['.'.join(modulename.split('.')[:-level]) +
178 179 '.' + name]
179 180
180 181 for n in candidates:
181 182 if n in localmods:
182 183 return (n, n, False)
183 184 dottedpath = n + '.__init__'
184 185 if dottedpath in localmods:
185 186 return (n, dottedpath, True)
186 187 return False
187 188 return fromlocal
188 189
189 190 def populateextmods(localmods):
190 191 """Populate C extension modules based on pure modules"""
191 192 newlocalmods = set(localmods)
192 193 for n in localmods:
193 194 if n.startswith('mercurial.pure.'):
194 195 m = n[len('mercurial.pure.'):]
195 196 newlocalmods.add('mercurial.cext.' + m)
196 197 newlocalmods.add('mercurial.cffi._' + m)
197 198 return newlocalmods
198 199
199 200 def list_stdlib_modules():
200 201 """List the modules present in the stdlib.
201 202
202 203 >>> py3 = sys.version_info[0] >= 3
203 204 >>> mods = set(list_stdlib_modules())
204 205 >>> 'BaseHTTPServer' in mods or py3
205 206 True
206 207
207 208 os.path isn't really a module, so it's missing:
208 209
209 210 >>> 'os.path' in mods
210 211 False
211 212
212 213 sys requires special treatment, because it's baked into the
213 214 interpreter, but it should still appear:
214 215
215 216 >>> 'sys' in mods
216 217 True
217 218
218 219 >>> 'collections' in mods
219 220 True
220 221
221 222 >>> 'cStringIO' in mods or py3
222 223 True
223 224
224 225 >>> 'cffi' in mods
225 226 True
226 227 """
227 228 for m in sys.builtin_module_names:
228 229 yield m
229 230 # These modules only exist on windows, but we should always
230 231 # consider them stdlib.
231 232 for m in ['msvcrt', '_winreg']:
232 233 yield m
233 234 yield '__builtin__'
234 235 yield 'builtins' # python3 only
235 236 yield 'importlib.abc' # python3 only
236 237 yield 'importlib.machinery' # python3 only
237 238 yield 'importlib.util' # python3 only
238 239 for m in 'fcntl', 'grp', 'pwd', 'termios': # Unix only
239 240 yield m
240 241 for m in 'cPickle', 'datetime': # in Python (not C) on PyPy
241 242 yield m
242 243 for m in ['cffi']:
243 244 yield m
244 245 stdlib_prefixes = {sys.prefix, sys.exec_prefix}
245 246 # We need to supplement the list of prefixes for the search to work
246 247 # when run from within a virtualenv.
247 248 for mod in (basehttpserver, zlib):
248 249 if mod is None:
249 250 continue
250 251 try:
251 252 # Not all module objects have a __file__ attribute.
252 253 filename = mod.__file__
253 254 except AttributeError:
254 255 continue
255 256 dirname = os.path.dirname(filename)
256 257 for prefix in stdlib_prefixes:
257 258 if dirname.startswith(prefix):
258 259 # Then this directory is redundant.
259 260 break
260 261 else:
261 262 stdlib_prefixes.add(dirname)
262 263 sourceroot = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
263 264 for libpath in sys.path:
264 265 # We want to walk everything in sys.path that starts with something in
265 266 # stdlib_prefixes, but not directories from the hg sources.
266 267 if (os.path.abspath(libpath).startswith(sourceroot)
267 268 or not any(libpath.startswith(p) for p in stdlib_prefixes)):
268 269 continue
269 270 for top, dirs, files in os.walk(libpath):
270 271 for i, d in reversed(list(enumerate(dirs))):
271 272 if (not os.path.exists(os.path.join(top, d, '__init__.py'))
272 273 or top == libpath and d in ('hgdemandimport', 'hgext',
273 274 'mercurial')):
274 275 del dirs[i]
275 276 for name in files:
276 277 if not name.endswith(('.py', '.so', '.pyc', '.pyo', '.pyd')):
277 278 continue
278 279 if name.startswith('__init__.py'):
279 280 full_path = top
280 281 else:
281 282 full_path = os.path.join(top, name)
282 283 rel_path = full_path[len(libpath) + 1:]
283 284 mod = dotted_name_of_path(rel_path)
284 285 yield mod
285 286
286 287 stdlib_modules = set(list_stdlib_modules())
287 288
288 289 def imported_modules(source, modulename, f, localmods, ignore_nested=False):
289 290 """Given the source of a file as a string, yield the names
290 291 imported by that file.
291 292
292 293 Args:
293 294 source: The python source to examine as a string.
294 295 modulename: of specified python source (may have `__init__`)
295 296 localmods: set of locally defined module names (may have `__init__`)
296 297 ignore_nested: If true, import statements that do not start in
297 298 column zero will be ignored.
298 299
299 300 Returns:
300 301 A list of absolute module names imported by the given source.
301 302
302 303 >>> f = 'foo/xxx.py'
303 304 >>> modulename = 'foo.xxx'
304 305 >>> localmods = {'foo.__init__': True,
305 306 ... 'foo.foo1': True, 'foo.foo2': True,
306 307 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
307 308 ... 'baz.__init__': True, 'baz.baz1': True }
308 309 >>> # standard library (= not locally defined ones)
309 310 >>> sorted(imported_modules(
310 311 ... 'from stdlib1 import foo, bar; import stdlib2',
311 312 ... modulename, f, localmods))
312 313 []
313 314 >>> # relative importing
314 315 >>> sorted(imported_modules(
315 316 ... 'import foo1; from bar import bar1',
316 317 ... modulename, f, localmods))
317 318 ['foo.bar.bar1', 'foo.foo1']
318 319 >>> sorted(imported_modules(
319 320 ... 'from bar.bar1 import name1, name2, name3',
320 321 ... modulename, f, localmods))
321 322 ['foo.bar.bar1']
322 323 >>> # absolute importing
323 324 >>> sorted(imported_modules(
324 325 ... 'from baz import baz1, name1',
325 326 ... modulename, f, localmods))
326 327 ['baz.__init__', 'baz.baz1']
327 328 >>> # mixed importing, even though it shouldn't be recommended
328 329 >>> sorted(imported_modules(
329 330 ... 'import stdlib, foo1, baz',
330 331 ... modulename, f, localmods))
331 332 ['baz.__init__', 'foo.foo1']
332 333 >>> # ignore_nested
333 334 >>> sorted(imported_modules(
334 335 ... '''import foo
335 336 ... def wat():
336 337 ... import bar
337 338 ... ''', modulename, f, localmods))
338 339 ['foo.__init__', 'foo.bar.__init__']
339 340 >>> sorted(imported_modules(
340 341 ... '''import foo
341 342 ... def wat():
342 343 ... import bar
343 344 ... ''', modulename, f, localmods, ignore_nested=True))
344 345 ['foo.__init__']
345 346 """
346 347 fromlocal = fromlocalfunc(modulename, localmods)
347 348 for node in ast.walk(ast.parse(source, f)):
348 349 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
349 350 continue
350 351 if isinstance(node, ast.Import):
351 352 for n in node.names:
352 353 found = fromlocal(n.name)
353 354 if not found:
354 355 # this should import standard library
355 356 continue
356 357 yield found[1]
357 358 elif isinstance(node, ast.ImportFrom):
358 359 found = fromlocal(node.module, node.level)
359 360 if not found:
360 361 # this should import standard library
361 362 continue
362 363
363 364 absname, dottedpath, hassubmod = found
364 365 if not hassubmod:
365 366 # "dottedpath" is not a package; must be imported
366 367 yield dottedpath
367 368 # examination of "node.names" should be redundant
368 369 # e.g.: from mercurial.node import nullid, nullrev
369 370 continue
370 371
371 372 modnotfound = False
372 373 prefix = absname + '.'
373 374 for n in node.names:
374 375 found = fromlocal(prefix + n.name)
375 376 if not found:
376 377 # this should be a function or a property of "node.module"
377 378 modnotfound = True
378 379 continue
379 380 yield found[1]
380 381 if modnotfound:
381 382 # "dottedpath" is a package, but imported because of non-module
382 383 # lookup
383 384 yield dottedpath
384 385
385 386 def verify_import_convention(module, source, localmods):
386 387 """Verify imports match our established coding convention.
387 388
388 389 We have 2 conventions: legacy and modern. The modern convention is in
389 390 effect when using absolute imports.
390 391
391 392 The legacy convention only looks for mixed imports. The modern convention
392 393 is much more thorough.
393 394 """
394 395 root = ast.parse(source)
395 396 absolute = usingabsolute(root)
396 397
397 398 if absolute:
398 399 return verify_modern_convention(module, root, localmods)
399 400 else:
400 401 return verify_stdlib_on_own_line(root)
401 402
402 403 def verify_modern_convention(module, root, localmods, root_col_offset=0):
403 404 """Verify a file conforms to the modern import convention rules.
404 405
405 406 The rules of the modern convention are:
406 407
407 408 * Ordering is stdlib followed by local imports. Each group is lexically
408 409 sorted.
409 410 * Importing multiple modules via "import X, Y" is not allowed: use
410 411 separate import statements.
411 412 * Importing multiple modules via "from X import ..." is allowed if using
412 413 parenthesis and one entry per line.
413 414 * Only 1 relative import statement per import level ("from .", "from ..")
414 415 is allowed.
415 416 * Relative imports from higher levels must occur before lower levels. e.g.
416 417 "from .." must be before "from .".
417 418 * Imports from peer packages should use relative import (e.g. do not
418 419 "import mercurial.foo" from a "mercurial.*" module).
419 420 * Symbols can only be imported from specific modules (see
420 421 `allowsymbolimports`). For other modules, first import the module then
421 422 assign the symbol to a module-level variable. In addition, these imports
422 423 must be performed before other local imports. This rule only
423 424 applies to import statements outside of any blocks.
424 425 * Relative imports from the standard library are not allowed, unless that
425 426 library is also a local module.
426 427 * Certain modules must be aliased to alternate names to avoid aliasing
427 428 and readability problems. See `requirealias`.
428 429 """
429 430 if not isinstance(module, str):
430 431 module = module.decode('ascii')
431 432 topmodule = module.split('.')[0]
432 433 fromlocal = fromlocalfunc(module, localmods)
433 434
434 435 # Whether a local/non-stdlib import has been performed.
435 436 seenlocal = None
436 437 # Whether a local/non-stdlib, non-symbol import has been seen.
437 438 seennonsymbollocal = False
438 439 # The last name to be imported (for sorting).
439 440 lastname = None
440 441 laststdlib = None
441 442 # Relative import levels encountered so far.
442 443 seenlevels = set()
443 444
444 445 for node, newscope in walklocal(root):
445 446 def msg(fmt, *args):
446 447 return (fmt % args, node.lineno)
447 448 if newscope:
448 449 # Check for local imports in function
449 450 for r in verify_modern_convention(module, node, localmods,
450 451 node.col_offset + 4):
451 452 yield r
452 453 elif isinstance(node, ast.Import):
453 454 # Disallow "import foo, bar" and require separate imports
454 455 # for each module.
455 456 if len(node.names) > 1:
456 457 yield msg('multiple imported names: %s',
457 458 ', '.join(n.name for n in node.names))
458 459
459 460 name = node.names[0].name
460 461 asname = node.names[0].asname
461 462
462 463 stdlib = name in stdlib_modules
463 464
464 465 # Ignore sorting rules on imports inside blocks.
465 466 if node.col_offset == root_col_offset:
466 467 if lastname and name < lastname and laststdlib == stdlib:
467 468 yield msg('imports not lexically sorted: %s < %s',
468 469 name, lastname)
469 470
470 471 lastname = name
471 472 laststdlib = stdlib
472 473
473 474 # stdlib imports should be before local imports.
474 475 if stdlib and seenlocal and node.col_offset == root_col_offset:
475 476 yield msg('stdlib import "%s" follows local import: %s',
476 477 name, seenlocal)
477 478
478 479 if not stdlib:
479 480 seenlocal = name
480 481
481 482 # Import of sibling modules should use relative imports.
482 483 topname = name.split('.')[0]
483 484 if topname == topmodule:
484 485 yield msg('import should be relative: %s', name)
485 486
486 487 if name in requirealias and asname != requirealias[name]:
487 488 yield msg('%s module must be "as" aliased to %s',
488 489 name, requirealias[name])
489 490
490 491 elif isinstance(node, ast.ImportFrom):
491 492 # Resolve the full imported module name.
492 493 if node.level > 0:
493 494 fullname = '.'.join(module.split('.')[:-node.level])
494 495 if node.module:
495 496 fullname += '.%s' % node.module
496 497 else:
497 498 assert node.module
498 499 fullname = node.module
499 500
500 501 topname = fullname.split('.')[0]
501 502 if topname == topmodule:
502 503 yield msg('import should be relative: %s', fullname)
503 504
504 505 # __future__ is special since it needs to come first and use
505 506 # symbol import.
506 507 if fullname != '__future__':
507 508 if not fullname or (
508 509 fullname in stdlib_modules
509 510 and fullname not in localmods
510 511 and fullname + '.__init__' not in localmods):
511 512 yield msg('relative import of stdlib module')
512 513 else:
513 514 seenlocal = fullname
514 515
515 516 # Direct symbol import is only allowed from certain modules and
516 517 # must occur before non-symbol imports.
517 518 found = fromlocal(node.module, node.level)
518 519 if found and found[2]: # node.module is a package
519 520 prefix = found[0] + '.'
520 521 symbols = (n.name for n in node.names
521 522 if not fromlocal(prefix + n.name))
522 523 else:
523 524 symbols = (n.name for n in node.names)
524 525 symbols = [sym for sym in symbols if sym not in directsymbols]
525 526 if node.module and node.col_offset == root_col_offset:
526 527 if symbols and fullname not in allowsymbolimports:
527 528 yield msg('direct symbol import %s from %s',
528 529 ', '.join(symbols), fullname)
529 530
530 531 if symbols and seennonsymbollocal:
531 532 yield msg('symbol import follows non-symbol import: %s',
532 533 fullname)
533 534 if not symbols and fullname not in stdlib_modules:
534 535 seennonsymbollocal = True
535 536
536 537 if not node.module:
537 538 assert node.level
538 539
539 540 # Only allow 1 group per level.
540 541 if (node.level in seenlevels
541 542 and node.col_offset == root_col_offset):
542 543 yield msg('multiple "from %s import" statements',
543 544 '.' * node.level)
544 545
545 546 # Higher-level groups come before lower-level groups.
546 547 if any(node.level > l for l in seenlevels):
547 548 yield msg('higher-level import should come first: %s',
548 549 fullname)
549 550
550 551 seenlevels.add(node.level)
551 552
552 553 # Entries in "from .X import ( ... )" lists must be lexically
553 554 # sorted.
554 555 lastentryname = None
555 556
556 557 for n in node.names:
557 558 if lastentryname and n.name < lastentryname:
558 559 yield msg('imports from %s not lexically sorted: %s < %s',
559 560 fullname, n.name, lastentryname)
560 561
561 562 lastentryname = n.name
562 563
563 564 if n.name in requirealias and n.asname != requirealias[n.name]:
564 565 yield msg('%s from %s must be "as" aliased to %s',
565 566 n.name, fullname, requirealias[n.name])
566 567
567 568 def verify_stdlib_on_own_line(root):
568 569 """Given some python source, verify that stdlib imports are done
569 570 in separate statements from relative local module imports.
570 571
571 572 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, foo')))
572 573 [('mixed imports\\n stdlib: sys\\n relative: foo', 1)]
573 574 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, os')))
574 575 []
575 576 >>> list(verify_stdlib_on_own_line(ast.parse('import foo, bar')))
576 577 []
577 578 """
578 579 for node in ast.walk(root):
579 580 if isinstance(node, ast.Import):
580 581 from_stdlib = {False: [], True: []}
581 582 for n in node.names:
582 583 from_stdlib[n.name in stdlib_modules].append(n.name)
583 584 if from_stdlib[True] and from_stdlib[False]:
584 585 yield ('mixed imports\n stdlib: %s\n relative: %s' %
585 586 (', '.join(sorted(from_stdlib[True])),
586 587 ', '.join(sorted(from_stdlib[False]))), node.lineno)
587 588
588 589 class CircularImport(Exception):
589 590 pass
590 591
591 592 def checkmod(mod, imports):
592 593 shortest = {}
593 594 visit = [[mod]]
594 595 while visit:
595 596 path = visit.pop(0)
596 597 for i in sorted(imports.get(path[-1], [])):
597 598 if len(path) < shortest.get(i, 1000):
598 599 shortest[i] = len(path)
599 600 if i in path:
600 601 if i == path[0]:
601 602 raise CircularImport(path)
602 603 continue
603 604 visit.append(path + [i])
604 605
605 606 def rotatecycle(cycle):
606 607 """arrange a cycle so that the lexicographically first module listed first
607 608
608 609 >>> rotatecycle(['foo', 'bar'])
609 610 ['bar', 'foo', 'bar']
610 611 """
611 612 lowest = min(cycle)
612 613 idx = cycle.index(lowest)
613 614 return cycle[idx:] + cycle[:idx] + [lowest]
614 615
615 616 def find_cycles(imports):
616 617 """Find cycles in an already-loaded import graph.
617 618
618 619 All module names recorded in `imports` should be absolute one.
619 620
620 621 >>> from __future__ import print_function
621 622 >>> imports = {'top.foo': ['top.bar', 'os.path', 'top.qux'],
622 623 ... 'top.bar': ['top.baz', 'sys'],
623 624 ... 'top.baz': ['top.foo'],
624 625 ... 'top.qux': ['top.foo']}
625 626 >>> print('\\n'.join(sorted(find_cycles(imports))))
626 627 top.bar -> top.baz -> top.foo -> top.bar
627 628 top.foo -> top.qux -> top.foo
628 629 """
629 630 cycles = set()
630 631 for mod in sorted(imports.keys()):
631 632 try:
632 633 checkmod(mod, imports)
633 634 except CircularImport as e:
634 635 cycle = e.args[0]
635 636 cycles.add(" -> ".join(rotatecycle(cycle)))
636 637 return cycles
637 638
638 639 def _cycle_sortkey(c):
639 640 return len(c), c
640 641
641 642 def embedded(f, modname, src):
642 643 """Extract embedded python code
643 644
644 645 >>> def _forcestr(thing):
645 646 ... if not isinstance(thing, str):
646 647 ... return thing.decode('ascii')
647 648 ... return thing
648 649 >>> def test(fn, lines):
649 650 ... for s, m, f, l in embedded(fn, b"example", lines):
650 651 ... print("%s %s %d" % (_forcestr(m), _forcestr(f), l))
651 652 ... print(repr(_forcestr(s)))
652 653 >>> lines = [
653 654 ... 'comment',
654 655 ... ' >>> from __future__ import print_function',
655 656 ... " >>> ' multiline",
656 657 ... " ... string'",
657 658 ... ' ',
658 659 ... 'comment',
659 660 ... ' $ cat > foo.py <<EOF',
660 661 ... ' > from __future__ import print_function',
661 662 ... ' > EOF',
662 663 ... ]
663 664 >>> test(b"example.t", lines)
664 665 example[2] doctest.py 1
665 666 "from __future__ import print_function\\n' multiline\\nstring'\\n\\n"
666 667 example[8] foo.py 7
667 668 'from __future__ import print_function\\n'
668 669 """
669 670 errors = []
670 671 for name, starts, ends, code in testparseutil.pyembedded(f, src, errors):
671 672 if not name:
672 673 # use 'doctest.py', in order to make already existing
673 674 # doctest above pass instantly
674 675 name = 'doctest.py'
675 676 # "starts" is "line number" (1-origin), but embedded() is
676 677 # expected to return "line offset" (0-origin). Therefore, this
677 678 # yields "starts - 1".
678 679 if not isinstance(modname, str):
679 680 modname = modname.decode('utf8')
680 681 yield code, "%s[%d]" % (modname, starts), name, starts - 1
681 682
682 683 def sources(f, modname):
683 684 """Yields possibly multiple sources from a filepath
684 685
685 686 input: filepath, modulename
686 687 yields: script(string), modulename, filepath, linenumber
687 688
688 689 For embedded scripts, the modulename and filepath will be different
689 690 from the function arguments. linenumber is an offset relative to
690 691 the input file.
691 692 """
692 693 py = False
693 694 if not f.endswith('.t'):
694 695 with open(f, 'rb') as src:
695 696 yield src.read(), modname, f, 0
696 697 py = True
697 698 if py or f.endswith('.t'):
698 699 with open(f, 'r') as src:
699 700 for script, modname, t, line in embedded(f, modname, src):
700 701 yield script, modname.encode('utf8'), t, line
701 702
702 703 def main(argv):
703 704 if len(argv) < 2 or (argv[1] == '-' and len(argv) > 2):
704 705 print('Usage: %s {-|file [file] [file] ...}')
705 706 return 1
706 707 if argv[1] == '-':
707 708 argv = argv[:1]
708 709 argv.extend(l.rstrip() for l in sys.stdin.readlines())
709 710 localmodpaths = {}
710 711 used_imports = {}
711 712 any_errors = False
712 713 for source_path in argv[1:]:
713 714 modname = dotted_name_of_path(source_path)
714 715 localmodpaths[modname] = source_path
715 716 localmods = populateextmods(localmodpaths)
716 717 for localmodname, source_path in sorted(localmodpaths.items()):
717 718 if not isinstance(localmodname, bytes):
718 719 # This is only safe because all hg's files are ascii
719 720 localmodname = localmodname.encode('ascii')
720 721 for src, modname, name, line in sources(source_path, localmodname):
721 722 try:
722 723 used_imports[modname] = sorted(
723 724 imported_modules(src, modname, name, localmods,
724 725 ignore_nested=True))
725 726 for error, lineno in verify_import_convention(modname, src,
726 727 localmods):
727 728 any_errors = True
728 729 print('%s:%d: %s' % (source_path, lineno + line, error))
729 730 except SyntaxError as e:
730 731 print('%s:%d: SyntaxError: %s' %
731 732 (source_path, e.lineno + line, e))
732 733 cycles = find_cycles(used_imports)
733 734 if cycles:
734 735 firstmods = set()
735 736 for c in sorted(cycles, key=_cycle_sortkey):
736 737 first = c.split()[0]
737 738 # As a rough cut, ignore any cycle that starts with the
738 739 # same module as some other cycle. Otherwise we see lots
739 740 # of cycles that are effectively duplicates.
740 741 if first in firstmods:
741 742 continue
742 743 print('Import cycle:', c)
743 744 firstmods.add(first)
744 745 any_errors = True
745 746 return any_errors != 0
746 747
747 748 if __name__ == '__main__':
748 749 sys.exit(int(main(sys.argv)))
@@ -1,383 +1,386
1 1 # lfs - hash-preserving large file support using Git-LFS protocol
2 2 #
3 3 # Copyright 2017 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """lfs - large file support (EXPERIMENTAL)
9 9
10 10 This extension allows large files to be tracked outside of the normal
11 11 repository storage and stored on a centralized server, similar to the
12 12 ``largefiles`` extension. The ``git-lfs`` protocol is used when
13 13 communicating with the server, so existing git infrastructure can be
14 14 harnessed. Even though the files are stored outside of the repository,
15 15 they are still integrity checked in the same manner as normal files.
16 16
17 17 The files stored outside of the repository are downloaded on demand,
18 18 which reduces the time to clone, and possibly the local disk usage.
19 19 This changes fundamental workflows in a DVCS, so careful thought
20 20 should be given before deploying it. :hg:`convert` can be used to
21 21 convert LFS repositories to normal repositories that no longer
22 22 require this extension, and do so without changing the commit hashes.
23 23 This allows the extension to be disabled if the centralized workflow
24 24 becomes burdensome. However, the pre and post convert clones will
25 25 not be able to communicate with each other unless the extension is
26 26 enabled on both.
27 27
28 28 To start a new repository, or to add LFS files to an existing one, just
29 29 create an ``.hglfs`` file as described below in the root directory of
30 30 the repository. Typically, this file should be put under version
31 31 control, so that the settings will propagate to other repositories with
32 32 push and pull. During any commit, Mercurial will consult this file to
33 33 determine if an added or modified file should be stored externally. The
34 34 type of storage depends on the characteristics of the file at each
35 35 commit. A file that is near a size threshold may switch back and forth
36 36 between LFS and normal storage, as needed.
37 37
38 38 Alternately, both normal repositories and largefile controlled
39 39 repositories can be converted to LFS by using :hg:`convert` and the
40 40 ``lfs.track`` config option described below. The ``.hglfs`` file
41 41 should then be created and added, to control subsequent LFS selection.
42 42 The hashes are also unchanged in this case. The LFS and non-LFS
43 43 repositories can be distinguished because the LFS repository will
44 44 abort any command if this extension is disabled.
45 45
46 46 Committed LFS files are held locally, until the repository is pushed.
47 47 Prior to pushing the normal repository data, the LFS files that are
48 48 tracked by the outgoing commits are automatically uploaded to the
49 49 configured central server. No LFS files are transferred on
50 50 :hg:`pull` or :hg:`clone`. Instead, the files are downloaded on
51 51 demand as they need to be read, if a cached copy cannot be found
52 52 locally. Both committing and downloading an LFS file will link the
53 53 file to a usercache, to speed up future access. See the `usercache`
54 54 config setting described below.
55 55
56 56 .hglfs::
57 57
58 58 The extension reads its configuration from a versioned ``.hglfs``
59 59 configuration file found in the root of the working directory. The
60 60 ``.hglfs`` file uses the same syntax as all other Mercurial
61 61 configuration files. It uses a single section, ``[track]``.
62 62
63 63 The ``[track]`` section specifies which files are stored as LFS (or
64 64 not). Each line is keyed by a file pattern, with a predicate value.
65 65 The first file pattern match is used, so put more specific patterns
66 66 first. The available predicates are ``all()``, ``none()``, and
67 67 ``size()``. See "hg help filesets.size" for the latter.
68 68
69 69 Example versioned ``.hglfs`` file::
70 70
71 71 [track]
72 72 # No Makefile or python file, anywhere, will be LFS
73 73 **Makefile = none()
74 74 **.py = none()
75 75
76 76 **.zip = all()
77 77 **.exe = size(">1MB")
78 78
79 79 # Catchall for everything not matched above
80 80 ** = size(">10MB")
81 81
82 82 Configs::
83 83
84 84 [lfs]
85 85 # Remote endpoint. Multiple protocols are supported:
86 86 # - http(s)://user:pass@example.com/path
87 87 # git-lfs endpoint
88 88 # - file:///tmp/path
89 89 # local filesystem, usually for testing
90 90 # if unset, lfs will assume the remote repository also handles blob storage
91 91 # for http(s) URLs. Otherwise, lfs will prompt to set this when it must
92 92 # use this value.
93 93 # (default: unset)
94 94 url = https://example.com/repo.git/info/lfs
95 95
96 96 # Which files to track in LFS. Path tests are "**.extname" for file
97 97 # extensions, and "path:under/some/directory" for path prefix. Both
98 98 # are relative to the repository root.
99 99 # File size can be tested with the "size()" fileset, and tests can be
100 100 # joined with fileset operators. (See "hg help filesets.operators".)
101 101 #
102 102 # Some examples:
103 103 # - all() # everything
104 104 # - none() # nothing
105 105 # - size(">20MB") # larger than 20MB
106 106 # - !**.txt # anything not a *.txt file
107 107 # - **.zip | **.tar.gz | **.7z # some types of compressed files
108 108 # - path:bin # files under "bin" in the project root
109 109 # - (**.php & size(">2MB")) | (**.js & size(">5MB")) | **.tar.gz
110 110 # | (path:bin & !path:/bin/README) | size(">1GB")
111 111 # (default: none())
112 112 #
113 113 # This is ignored if there is a tracked '.hglfs' file, and this setting
114 114 # will eventually be deprecated and removed.
115 115 track = size(">10M")
116 116
117 117 # how many times to retry before giving up on transferring an object
118 118 retry = 5
119 119
120 120 # the local directory to store lfs files for sharing across local clones.
121 121 # If not set, the cache is located in an OS specific cache location.
122 122 usercache = /path/to/global/cache
123 123 """
124 124
125 125 from __future__ import absolute_import
126 126
127 127 import sys
128 128
129 129 from mercurial.i18n import _
130 130
131 131 from mercurial import (
132 132 config,
133 133 context,
134 134 error,
135 135 exchange,
136 136 extensions,
137 137 exthelper,
138 138 filelog,
139 139 filesetlang,
140 140 localrepo,
141 141 minifileset,
142 142 node,
143 143 pycompat,
144 repository,
145 144 revlog,
146 145 scmutil,
147 146 templateutil,
148 147 util,
149 148 )
150 149
150 from mercurial.interfaces import (
151 repository,
152 )
153
151 154 from . import (
152 155 blobstore,
153 156 wireprotolfsserver,
154 157 wrapper,
155 158 )
156 159
157 160 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
158 161 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
159 162 # be specifying the version(s) of Mercurial they are tested with, or
160 163 # leave the attribute unspecified.
161 164 testedwith = 'ships-with-hg-core'
162 165
163 166 eh = exthelper.exthelper()
164 167 eh.merge(wrapper.eh)
165 168 eh.merge(wireprotolfsserver.eh)
166 169
167 170 cmdtable = eh.cmdtable
168 171 configtable = eh.configtable
169 172 extsetup = eh.finalextsetup
170 173 uisetup = eh.finaluisetup
171 174 filesetpredicate = eh.filesetpredicate
172 175 reposetup = eh.finalreposetup
173 176 templatekeyword = eh.templatekeyword
174 177
175 178 eh.configitem('experimental', 'lfs.serve',
176 179 default=True,
177 180 )
178 181 eh.configitem('experimental', 'lfs.user-agent',
179 182 default=None,
180 183 )
181 184 eh.configitem('experimental', 'lfs.disableusercache',
182 185 default=False,
183 186 )
184 187 eh.configitem('experimental', 'lfs.worker-enable',
185 188 default=False,
186 189 )
187 190
188 191 eh.configitem('lfs', 'url',
189 192 default=None,
190 193 )
191 194 eh.configitem('lfs', 'usercache',
192 195 default=None,
193 196 )
194 197 # Deprecated
195 198 eh.configitem('lfs', 'threshold',
196 199 default=None,
197 200 )
198 201 eh.configitem('lfs', 'track',
199 202 default='none()',
200 203 )
201 204 eh.configitem('lfs', 'retry',
202 205 default=5,
203 206 )
204 207
205 208 lfsprocessor = (
206 209 wrapper.readfromstore,
207 210 wrapper.writetostore,
208 211 wrapper.bypasscheckhash,
209 212 )
210 213
211 214 def featuresetup(ui, supported):
212 215 # don't die on seeing a repo with the lfs requirement
213 216 supported |= {'lfs'}
214 217
215 218 @eh.uisetup
216 219 def _uisetup(ui):
217 220 localrepo.featuresetupfuncs.add(featuresetup)
218 221
219 222 @eh.reposetup
220 223 def _reposetup(ui, repo):
221 224 # Nothing to do with a remote repo
222 225 if not repo.local():
223 226 return
224 227
225 228 repo.svfs.lfslocalblobstore = blobstore.local(repo)
226 229 repo.svfs.lfsremoteblobstore = blobstore.remote(repo)
227 230
228 231 class lfsrepo(repo.__class__):
229 232 @localrepo.unfilteredmethod
230 233 def commitctx(self, ctx, error=False, origctx=None):
231 234 repo.svfs.options['lfstrack'] = _trackedmatcher(self)
232 235 return super(lfsrepo, self).commitctx(ctx, error, origctx=origctx)
233 236
234 237 repo.__class__ = lfsrepo
235 238
236 239 if 'lfs' not in repo.requirements:
237 240 def checkrequireslfs(ui, repo, **kwargs):
238 241 if 'lfs' in repo.requirements:
239 242 return 0
240 243
241 244 last = kwargs.get(r'node_last')
242 245 _bin = node.bin
243 246 if last:
244 247 s = repo.set('%n:%n', _bin(kwargs[r'node']), _bin(last))
245 248 else:
246 249 s = repo.set('%n', _bin(kwargs[r'node']))
247 250 match = repo._storenarrowmatch
248 251 for ctx in s:
249 252 # TODO: is there a way to just walk the files in the commit?
250 253 if any(ctx[f].islfs() for f in ctx.files()
251 254 if f in ctx and match(f)):
252 255 repo.requirements.add('lfs')
253 256 repo.features.add(repository.REPO_FEATURE_LFS)
254 257 repo._writerequirements()
255 258 repo.prepushoutgoinghooks.add('lfs', wrapper.prepush)
256 259 break
257 260
258 261 ui.setconfig('hooks', 'commit.lfs', checkrequireslfs, 'lfs')
259 262 ui.setconfig('hooks', 'pretxnchangegroup.lfs', checkrequireslfs, 'lfs')
260 263 else:
261 264 repo.prepushoutgoinghooks.add('lfs', wrapper.prepush)
262 265
263 266 def _trackedmatcher(repo):
264 267 """Return a function (path, size) -> bool indicating whether or not to
265 268 track a given file with lfs."""
266 269 if not repo.wvfs.exists('.hglfs'):
267 270 # No '.hglfs' in wdir. Fallback to config for now.
268 271 trackspec = repo.ui.config('lfs', 'track')
269 272
270 273 # deprecated config: lfs.threshold
271 274 threshold = repo.ui.configbytes('lfs', 'threshold')
272 275 if threshold:
273 276 filesetlang.parse(trackspec) # make sure syntax errors are confined
274 277 trackspec = "(%s) | size('>%d')" % (trackspec, threshold)
275 278
276 279 return minifileset.compile(trackspec)
277 280
278 281 data = repo.wvfs.tryread('.hglfs')
279 282 if not data:
280 283 return lambda p, s: False
281 284
282 285 # Parse errors here will abort with a message that points to the .hglfs file
283 286 # and line number.
284 287 cfg = config.config()
285 288 cfg.parse('.hglfs', data)
286 289
287 290 try:
288 291 rules = [(minifileset.compile(pattern), minifileset.compile(rule))
289 292 for pattern, rule in cfg.items('track')]
290 293 except error.ParseError as e:
291 294 # The original exception gives no indicator that the error is in the
292 295 # .hglfs file, so add that.
293 296
294 297 # TODO: See if the line number of the file can be made available.
295 298 raise error.Abort(_('parse error in .hglfs: %s') % e)
296 299
297 300 def _match(path, size):
298 301 for pat, rule in rules:
299 302 if pat(path, size):
300 303 return rule(path, size)
301 304
302 305 return False
303 306
304 307 return _match
305 308
306 309 # Called by remotefilelog
307 310 def wrapfilelog(filelog):
308 311 wrapfunction = extensions.wrapfunction
309 312
310 313 wrapfunction(filelog, 'addrevision', wrapper.filelogaddrevision)
311 314 wrapfunction(filelog, 'renamed', wrapper.filelogrenamed)
312 315 wrapfunction(filelog, 'size', wrapper.filelogsize)
313 316
314 317 @eh.wrapfunction(localrepo, 'resolverevlogstorevfsoptions')
315 318 def _resolverevlogstorevfsoptions(orig, ui, requirements, features):
316 319 opts = orig(ui, requirements, features)
317 320 for name, module in extensions.extensions(ui):
318 321 if module is sys.modules[__name__]:
319 322 if revlog.REVIDX_EXTSTORED in opts[b'flagprocessors']:
320 323 msg = (_(b"cannot register multiple processors on flag '%#x'.")
321 324 % revlog.REVIDX_EXTSTORED)
322 325 raise error.Abort(msg)
323 326
324 327 opts[b'flagprocessors'][revlog.REVIDX_EXTSTORED] = lfsprocessor
325 328 break
326 329
327 330 return opts
328 331
329 332 @eh.extsetup
330 333 def _extsetup(ui):
331 334 wrapfilelog(filelog.filelog)
332 335
333 336 context.basefilectx.islfs = wrapper.filectxislfs
334 337
335 338 scmutil.fileprefetchhooks.add('lfs', wrapper._prefetchfiles)
336 339
337 340 # Make bundle choose changegroup3 instead of changegroup2. This affects
338 341 # "hg bundle" command. Note: it does not cover all bundle formats like
339 342 # "packed1". Using "packed1" with lfs will likely cause trouble.
340 343 exchange._bundlespeccontentopts["v2"]["cg.version"] = "03"
341 344
342 345 @eh.filesetpredicate('lfs()')
343 346 def lfsfileset(mctx, x):
344 347 """File that uses LFS storage."""
345 348 # i18n: "lfs" is a keyword
346 349 filesetlang.getargs(x, 0, 0, _("lfs takes no arguments"))
347 350 ctx = mctx.ctx
348 351 def lfsfilep(f):
349 352 return wrapper.pointerfromctx(ctx, f, removed=True) is not None
350 353 return mctx.predicate(lfsfilep, predrepr='<lfs>')
351 354
352 355 @eh.templatekeyword('lfs_files', requires={'ctx'})
353 356 def lfsfiles(context, mapping):
354 357 """List of strings. All files modified, added, or removed by this
355 358 changeset."""
356 359 ctx = context.resource(mapping, 'ctx')
357 360
358 361 pointers = wrapper.pointersfromctx(ctx, removed=True) # {path: pointer}
359 362 files = sorted(pointers.keys())
360 363
361 364 def pointer(v):
362 365 # In the file spec, version is first and the other keys are sorted.
363 366 sortkeyfunc = lambda x: (x[0] != 'version', x)
364 367 items = sorted(pointers[v].iteritems(), key=sortkeyfunc)
365 368 return util.sortdict(items)
366 369
367 370 makemap = lambda v: {
368 371 'file': v,
369 372 'lfsoid': pointers[v].oid() if pointers[v] else None,
370 373 'lfspointer': templateutil.hybriddict(pointer(v)),
371 374 }
372 375
373 376 # TODO: make the separator ', '?
374 377 f = templateutil._showcompatlist(context, mapping, 'lfs_file', files)
375 378 return templateutil.hybrid(f, files, makemap, pycompat.identity)
376 379
377 380 @eh.command('debuglfsupload',
378 381 [('r', 'rev', [], _('upload large files introduced by REV'))])
379 382 def debuglfsupload(ui, repo, **opts):
380 383 """upload lfs blobs added by the working copy parent or given revisions"""
381 384 revs = opts.get(r'rev', [])
382 385 pointers = wrapper.extractpointers(repo, scmutil.revrange(repo, revs))
383 386 wrapper.uploadblobs(repo, pointers)
@@ -1,446 +1,449
1 1 # wrapper.py - methods wrapping core mercurial logic
2 2 #
3 3 # Copyright 2017 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import hashlib
11 11
12 12 from mercurial.i18n import _
13 13 from mercurial.node import bin, hex, nullid, short
14 14
15 15 from mercurial import (
16 16 bundle2,
17 17 changegroup,
18 18 cmdutil,
19 19 context,
20 20 error,
21 21 exchange,
22 22 exthelper,
23 23 localrepo,
24 repository,
25 24 revlog,
26 25 scmutil,
27 26 upgrade,
28 27 util,
29 28 vfs as vfsmod,
30 29 wireprotov1server,
31 30 )
32 31
32 from mercurial.interfaces import (
33 repository,
34 )
35
33 36 from mercurial.utils import (
34 37 storageutil,
35 38 stringutil,
36 39 )
37 40
38 41 from ..largefiles import lfutil
39 42
40 43 from . import (
41 44 blobstore,
42 45 pointer,
43 46 )
44 47
45 48 eh = exthelper.exthelper()
46 49
47 50 @eh.wrapfunction(localrepo, 'makefilestorage')
48 51 def localrepomakefilestorage(orig, requirements, features, **kwargs):
49 52 if b'lfs' in requirements:
50 53 features.add(repository.REPO_FEATURE_LFS)
51 54
52 55 return orig(requirements=requirements, features=features, **kwargs)
53 56
54 57 @eh.wrapfunction(changegroup, 'allsupportedversions')
55 58 def allsupportedversions(orig, ui):
56 59 versions = orig(ui)
57 60 versions.add('03')
58 61 return versions
59 62
60 63 @eh.wrapfunction(wireprotov1server, '_capabilities')
61 64 def _capabilities(orig, repo, proto):
62 65 '''Wrap server command to announce lfs server capability'''
63 66 caps = orig(repo, proto)
64 67 if util.safehasattr(repo.svfs, 'lfslocalblobstore'):
65 68 # Advertise a slightly different capability when lfs is *required*, so
66 69 # that the client knows it MUST load the extension. If lfs is not
67 70 # required on the server, there's no reason to autoload the extension
68 71 # on the client.
69 72 if b'lfs' in repo.requirements:
70 73 caps.append('lfs-serve')
71 74
72 75 caps.append('lfs')
73 76 return caps
74 77
75 78 def bypasscheckhash(self, text):
76 79 return False
77 80
78 81 def readfromstore(self, text):
79 82 """Read filelog content from local blobstore transform for flagprocessor.
80 83
81 84 Default tranform for flagprocessor, returning contents from blobstore.
82 85 Returns a 2-typle (text, validatehash) where validatehash is True as the
83 86 contents of the blobstore should be checked using checkhash.
84 87 """
85 88 p = pointer.deserialize(text)
86 89 oid = p.oid()
87 90 store = self.opener.lfslocalblobstore
88 91 if not store.has(oid):
89 92 p.filename = self.filename
90 93 self.opener.lfsremoteblobstore.readbatch([p], store)
91 94
92 95 # The caller will validate the content
93 96 text = store.read(oid, verify=False)
94 97
95 98 # pack hg filelog metadata
96 99 hgmeta = {}
97 100 for k in p.keys():
98 101 if k.startswith('x-hg-'):
99 102 name = k[len('x-hg-'):]
100 103 hgmeta[name] = p[k]
101 104 if hgmeta or text.startswith('\1\n'):
102 105 text = storageutil.packmeta(hgmeta, text)
103 106
104 107 return (text, True)
105 108
106 109 def writetostore(self, text):
107 110 # hg filelog metadata (includes rename, etc)
108 111 hgmeta, offset = storageutil.parsemeta(text)
109 112 if offset and offset > 0:
110 113 # lfs blob does not contain hg filelog metadata
111 114 text = text[offset:]
112 115
113 116 # git-lfs only supports sha256
114 117 oid = hex(hashlib.sha256(text).digest())
115 118 self.opener.lfslocalblobstore.write(oid, text)
116 119
117 120 # replace contents with metadata
118 121 longoid = 'sha256:%s' % oid
119 122 metadata = pointer.gitlfspointer(oid=longoid, size='%d' % len(text))
120 123
121 124 # by default, we expect the content to be binary. however, LFS could also
122 125 # be used for non-binary content. add a special entry for non-binary data.
123 126 # this will be used by filectx.isbinary().
124 127 if not stringutil.binary(text):
125 128 # not hg filelog metadata (affecting commit hash), no "x-hg-" prefix
126 129 metadata['x-is-binary'] = '0'
127 130
128 131 # translate hg filelog metadata to lfs metadata with "x-hg-" prefix
129 132 if hgmeta is not None:
130 133 for k, v in hgmeta.iteritems():
131 134 metadata['x-hg-%s' % k] = v
132 135
133 136 rawtext = metadata.serialize()
134 137 return (rawtext, False)
135 138
136 139 def _islfs(rlog, node=None, rev=None):
137 140 if rev is None:
138 141 if node is None:
139 142 # both None - likely working copy content where node is not ready
140 143 return False
141 144 rev = rlog._revlog.rev(node)
142 145 else:
143 146 node = rlog._revlog.node(rev)
144 147 if node == nullid:
145 148 return False
146 149 flags = rlog._revlog.flags(rev)
147 150 return bool(flags & revlog.REVIDX_EXTSTORED)
148 151
149 152 # Wrapping may also be applied by remotefilelog
150 153 def filelogaddrevision(orig, self, text, transaction, link, p1, p2,
151 154 cachedelta=None, node=None,
152 155 flags=revlog.REVIDX_DEFAULT_FLAGS, **kwds):
153 156 # The matcher isn't available if reposetup() wasn't called.
154 157 lfstrack = self._revlog.opener.options.get('lfstrack')
155 158
156 159 if lfstrack:
157 160 textlen = len(text)
158 161 # exclude hg rename meta from file size
159 162 meta, offset = storageutil.parsemeta(text)
160 163 if offset:
161 164 textlen -= offset
162 165
163 166 if lfstrack(self._revlog.filename, textlen):
164 167 flags |= revlog.REVIDX_EXTSTORED
165 168
166 169 return orig(self, text, transaction, link, p1, p2, cachedelta=cachedelta,
167 170 node=node, flags=flags, **kwds)
168 171
169 172 # Wrapping may also be applied by remotefilelog
170 173 def filelogrenamed(orig, self, node):
171 174 if _islfs(self, node):
172 175 rawtext = self._revlog.rawdata(node)
173 176 if not rawtext:
174 177 return False
175 178 metadata = pointer.deserialize(rawtext)
176 179 if 'x-hg-copy' in metadata and 'x-hg-copyrev' in metadata:
177 180 return metadata['x-hg-copy'], bin(metadata['x-hg-copyrev'])
178 181 else:
179 182 return False
180 183 return orig(self, node)
181 184
182 185 # Wrapping may also be applied by remotefilelog
183 186 def filelogsize(orig, self, rev):
184 187 if _islfs(self, rev=rev):
185 188 # fast path: use lfs metadata to answer size
186 189 rawtext = self._revlog.rawdata(rev)
187 190 metadata = pointer.deserialize(rawtext)
188 191 return int(metadata['size'])
189 192 return orig(self, rev)
190 193
191 194 @eh.wrapfunction(context.basefilectx, 'cmp')
192 195 def filectxcmp(orig, self, fctx):
193 196 """returns True if text is different than fctx"""
194 197 # some fctx (ex. hg-git) is not based on basefilectx and do not have islfs
195 198 if self.islfs() and getattr(fctx, 'islfs', lambda: False)():
196 199 # fast path: check LFS oid
197 200 p1 = pointer.deserialize(self.rawdata())
198 201 p2 = pointer.deserialize(fctx.rawdata())
199 202 return p1.oid() != p2.oid()
200 203 return orig(self, fctx)
201 204
202 205 @eh.wrapfunction(context.basefilectx, 'isbinary')
203 206 def filectxisbinary(orig, self):
204 207 if self.islfs():
205 208 # fast path: use lfs metadata to answer isbinary
206 209 metadata = pointer.deserialize(self.rawdata())
207 210 # if lfs metadata says nothing, assume it's binary by default
208 211 return bool(int(metadata.get('x-is-binary', 1)))
209 212 return orig(self)
210 213
211 214 def filectxislfs(self):
212 215 return _islfs(self.filelog(), self.filenode())
213 216
214 217 @eh.wrapfunction(cmdutil, '_updatecatformatter')
215 218 def _updatecatformatter(orig, fm, ctx, matcher, path, decode):
216 219 orig(fm, ctx, matcher, path, decode)
217 220 fm.data(rawdata=ctx[path].rawdata())
218 221
219 222 @eh.wrapfunction(scmutil, 'wrapconvertsink')
220 223 def convertsink(orig, sink):
221 224 sink = orig(sink)
222 225 if sink.repotype == 'hg':
223 226 class lfssink(sink.__class__):
224 227 def putcommit(self, files, copies, parents, commit, source, revmap,
225 228 full, cleanp2):
226 229 pc = super(lfssink, self).putcommit
227 230 node = pc(files, copies, parents, commit, source, revmap, full,
228 231 cleanp2)
229 232
230 233 if 'lfs' not in self.repo.requirements:
231 234 ctx = self.repo[node]
232 235
233 236 # The file list may contain removed files, so check for
234 237 # membership before assuming it is in the context.
235 238 if any(f in ctx and ctx[f].islfs() for f, n in files):
236 239 self.repo.requirements.add('lfs')
237 240 self.repo._writerequirements()
238 241
239 242 return node
240 243
241 244 sink.__class__ = lfssink
242 245
243 246 return sink
244 247
245 248 # bundlerepo uses "vfsmod.readonlyvfs(othervfs)", we need to make sure lfs
246 249 # options and blob stores are passed from othervfs to the new readonlyvfs.
247 250 @eh.wrapfunction(vfsmod.readonlyvfs, '__init__')
248 251 def vfsinit(orig, self, othervfs):
249 252 orig(self, othervfs)
250 253 # copy lfs related options
251 254 for k, v in othervfs.options.items():
252 255 if k.startswith('lfs'):
253 256 self.options[k] = v
254 257 # also copy lfs blobstores. note: this can run before reposetup, so lfs
255 258 # blobstore attributes are not always ready at this time.
256 259 for name in ['lfslocalblobstore', 'lfsremoteblobstore']:
257 260 if util.safehasattr(othervfs, name):
258 261 setattr(self, name, getattr(othervfs, name))
259 262
260 263 def _prefetchfiles(repo, revs, match):
261 264 """Ensure that required LFS blobs are present, fetching them as a group if
262 265 needed."""
263 266 if not util.safehasattr(repo.svfs, 'lfslocalblobstore'):
264 267 return
265 268
266 269 pointers = []
267 270 oids = set()
268 271 localstore = repo.svfs.lfslocalblobstore
269 272
270 273 for rev in revs:
271 274 ctx = repo[rev]
272 275 for f in ctx.walk(match):
273 276 p = pointerfromctx(ctx, f)
274 277 if p and p.oid() not in oids and not localstore.has(p.oid()):
275 278 p.filename = f
276 279 pointers.append(p)
277 280 oids.add(p.oid())
278 281
279 282 if pointers:
280 283 # Recalculating the repo store here allows 'paths.default' that is set
281 284 # on the repo by a clone command to be used for the update.
282 285 blobstore.remote(repo).readbatch(pointers, localstore)
283 286
284 287 def _canskipupload(repo):
285 288 # Skip if this hasn't been passed to reposetup()
286 289 if not util.safehasattr(repo.svfs, 'lfsremoteblobstore'):
287 290 return True
288 291
289 292 # if remotestore is a null store, upload is a no-op and can be skipped
290 293 return isinstance(repo.svfs.lfsremoteblobstore, blobstore._nullremote)
291 294
292 295 def candownload(repo):
293 296 # Skip if this hasn't been passed to reposetup()
294 297 if not util.safehasattr(repo.svfs, 'lfsremoteblobstore'):
295 298 return False
296 299
297 300 # if remotestore is a null store, downloads will lead to nothing
298 301 return not isinstance(repo.svfs.lfsremoteblobstore, blobstore._nullremote)
299 302
300 303 def uploadblobsfromrevs(repo, revs):
301 304 '''upload lfs blobs introduced by revs
302 305
303 306 Note: also used by other extensions e. g. infinitepush. avoid renaming.
304 307 '''
305 308 if _canskipupload(repo):
306 309 return
307 310 pointers = extractpointers(repo, revs)
308 311 uploadblobs(repo, pointers)
309 312
310 313 def prepush(pushop):
311 314 """Prepush hook.
312 315
313 316 Read through the revisions to push, looking for filelog entries that can be
314 317 deserialized into metadata so that we can block the push on their upload to
315 318 the remote blobstore.
316 319 """
317 320 return uploadblobsfromrevs(pushop.repo, pushop.outgoing.missing)
318 321
319 322 @eh.wrapfunction(exchange, 'push')
320 323 def push(orig, repo, remote, *args, **kwargs):
321 324 """bail on push if the extension isn't enabled on remote when needed, and
322 325 update the remote store based on the destination path."""
323 326 if 'lfs' in repo.requirements:
324 327 # If the remote peer is for a local repo, the requirement tests in the
325 328 # base class method enforce lfs support. Otherwise, some revisions in
326 329 # this repo use lfs, and the remote repo needs the extension loaded.
327 330 if not remote.local() and not remote.capable('lfs'):
328 331 # This is a copy of the message in exchange.push() when requirements
329 332 # are missing between local repos.
330 333 m = _("required features are not supported in the destination: %s")
331 334 raise error.Abort(m % 'lfs',
332 335 hint=_('enable the lfs extension on the server'))
333 336
334 337 # Repositories where this extension is disabled won't have the field.
335 338 # But if there's a requirement, then the extension must be loaded AND
336 339 # there may be blobs to push.
337 340 remotestore = repo.svfs.lfsremoteblobstore
338 341 try:
339 342 repo.svfs.lfsremoteblobstore = blobstore.remote(repo, remote.url())
340 343 return orig(repo, remote, *args, **kwargs)
341 344 finally:
342 345 repo.svfs.lfsremoteblobstore = remotestore
343 346 else:
344 347 return orig(repo, remote, *args, **kwargs)
345 348
346 349 # when writing a bundle via "hg bundle" command, upload related LFS blobs
347 350 @eh.wrapfunction(bundle2, 'writenewbundle')
348 351 def writenewbundle(orig, ui, repo, source, filename, bundletype, outgoing,
349 352 *args, **kwargs):
350 353 """upload LFS blobs added by outgoing revisions on 'hg bundle'"""
351 354 uploadblobsfromrevs(repo, outgoing.missing)
352 355 return orig(ui, repo, source, filename, bundletype, outgoing, *args,
353 356 **kwargs)
354 357
355 358 def extractpointers(repo, revs):
356 359 """return a list of lfs pointers added by given revs"""
357 360 repo.ui.debug('lfs: computing set of blobs to upload\n')
358 361 pointers = {}
359 362
360 363 makeprogress = repo.ui.makeprogress
361 364 with makeprogress(_('lfs search'), _('changesets'), len(revs)) as progress:
362 365 for r in revs:
363 366 ctx = repo[r]
364 367 for p in pointersfromctx(ctx).values():
365 368 pointers[p.oid()] = p
366 369 progress.increment()
367 370 return sorted(pointers.values(), key=lambda p: p.oid())
368 371
369 372 def pointerfromctx(ctx, f, removed=False):
370 373 """return a pointer for the named file from the given changectx, or None if
371 374 the file isn't LFS.
372 375
373 376 Optionally, the pointer for a file deleted from the context can be returned.
374 377 Since no such pointer is actually stored, and to distinguish from a non LFS
375 378 file, this pointer is represented by an empty dict.
376 379 """
377 380 _ctx = ctx
378 381 if f not in ctx:
379 382 if not removed:
380 383 return None
381 384 if f in ctx.p1():
382 385 _ctx = ctx.p1()
383 386 elif f in ctx.p2():
384 387 _ctx = ctx.p2()
385 388 else:
386 389 return None
387 390 fctx = _ctx[f]
388 391 if not _islfs(fctx.filelog(), fctx.filenode()):
389 392 return None
390 393 try:
391 394 p = pointer.deserialize(fctx.rawdata())
392 395 if ctx == _ctx:
393 396 return p
394 397 return {}
395 398 except pointer.InvalidPointer as ex:
396 399 raise error.Abort(_('lfs: corrupted pointer (%s@%s): %s\n')
397 400 % (f, short(_ctx.node()), ex))
398 401
399 402 def pointersfromctx(ctx, removed=False):
400 403 """return a dict {path: pointer} for given single changectx.
401 404
402 405 If ``removed`` == True and the LFS file was removed from ``ctx``, the value
403 406 stored for the path is an empty dict.
404 407 """
405 408 result = {}
406 409 m = ctx.repo().narrowmatch()
407 410
408 411 # TODO: consider manifest.fastread() instead
409 412 for f in ctx.files():
410 413 if not m(f):
411 414 continue
412 415 p = pointerfromctx(ctx, f, removed=removed)
413 416 if p is not None:
414 417 result[f] = p
415 418 return result
416 419
417 420 def uploadblobs(repo, pointers):
418 421 """upload given pointers from local blobstore"""
419 422 if not pointers:
420 423 return
421 424
422 425 remoteblob = repo.svfs.lfsremoteblobstore
423 426 remoteblob.writebatch(pointers, repo.svfs.lfslocalblobstore)
424 427
425 428 @eh.wrapfunction(upgrade, '_finishdatamigration')
426 429 def upgradefinishdatamigration(orig, ui, srcrepo, dstrepo, requirements):
427 430 orig(ui, srcrepo, dstrepo, requirements)
428 431
429 432 # Skip if this hasn't been passed to reposetup()
430 433 if (util.safehasattr(srcrepo.svfs, 'lfslocalblobstore') and
431 434 util.safehasattr(dstrepo.svfs, 'lfslocalblobstore')):
432 435 srclfsvfs = srcrepo.svfs.lfslocalblobstore.vfs
433 436 dstlfsvfs = dstrepo.svfs.lfslocalblobstore.vfs
434 437
435 438 for dirpath, dirs, files in srclfsvfs.walk():
436 439 for oid in files:
437 440 ui.write(_('copying lfs blob %s\n') % oid)
438 441 lfutil.link(srclfsvfs.join(oid), dstlfsvfs.join(oid))
439 442
440 443 @eh.wrapfunction(upgrade, 'preservedrequirements')
441 444 @eh.wrapfunction(upgrade, 'supporteddestrequirements')
442 445 def upgraderequirements(orig, repo):
443 446 reqs = orig(repo)
444 447 if 'lfs' in repo.requirements:
445 448 reqs.add('lfs')
446 449 return reqs
@@ -1,71 +1,74
1 1 # __init__.py - narrowhg extension
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 '''create clones which fetch history data for subset of files (EXPERIMENTAL)'''
8 8
9 9 from __future__ import absolute_import
10 10
11 11 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
12 12 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
13 13 # be specifying the version(s) of Mercurial they are tested with, or
14 14 # leave the attribute unspecified.
15 15 testedwith = 'ships-with-hg-core'
16 16
17 17 from mercurial import (
18 18 localrepo,
19 19 registrar,
20 )
21
22 from mercurial.interfaces import (
20 23 repository,
21 24 )
22 25
23 26 from . import (
24 27 narrowbundle2,
25 28 narrowcommands,
26 29 narrowrepo,
27 30 narrowtemplates,
28 31 narrowwirepeer,
29 32 )
30 33
31 34 configtable = {}
32 35 configitem = registrar.configitem(configtable)
33 36 # Narrowhg *has* support for serving ellipsis nodes (which are used at
34 37 # least by Google's internal server), but that support is pretty
35 38 # fragile and has a lot of problems on real-world repositories that
36 39 # have complex graph topologies. This could probably be corrected, but
37 40 # absent someone needing the full support for ellipsis nodes in
38 41 # repositories with merges, it's unlikely this work will get done. As
39 42 # of this writining in late 2017, all repositories large enough for
40 43 # ellipsis nodes to be a hard requirement also enforce strictly linear
41 44 # history for other scaling reasons.
42 45 configitem('experimental', 'narrowservebrokenellipses',
43 46 default=False,
44 47 alias=[('narrow', 'serveellipses')],
45 48 )
46 49
47 50 # Export the commands table for Mercurial to see.
48 51 cmdtable = narrowcommands.table
49 52
50 53 def featuresetup(ui, features):
51 54 features.add(repository.NARROW_REQUIREMENT)
52 55
53 56 def uisetup(ui):
54 57 """Wraps user-facing mercurial commands with narrow-aware versions."""
55 58 localrepo.featuresetupfuncs.add(featuresetup)
56 59 narrowbundle2.setup()
57 60 narrowcommands.setup()
58 61 narrowwirepeer.uisetup()
59 62
60 63 def reposetup(ui, repo):
61 64 """Wraps local repositories with narrow repo support."""
62 65 if not repo.local():
63 66 return
64 67
65 68 repo.ui.setconfig('experimental', 'narrow', True, 'narrow-ext')
66 69 if repository.NARROW_REQUIREMENT in repo.requirements:
67 70 narrowrepo.wraprepo(repo)
68 71 narrowwirepeer.reposetup(repo)
69 72
70 73 templatekeyword = narrowtemplates.templatekeyword
71 74 revsetpredicate = narrowtemplates.revsetpredicate
@@ -1,301 +1,303
1 1 # narrowbundle2.py - bundle2 extensions for narrow repository support
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import struct
12 12
13 13 from mercurial.i18n import _
14 14 from mercurial.node import (
15 15 bin,
16 16 nullid,
17 17 )
18 18 from mercurial import (
19 19 bundle2,
20 20 changegroup,
21 21 error,
22 22 exchange,
23 23 localrepo,
24 24 narrowspec,
25 25 repair,
26 repository,
27 26 util,
28 27 wireprototypes,
29 28 )
29 from mercurial.interfaces import (
30 repository,
31 )
30 32 from mercurial.utils import (
31 33 stringutil,
32 34 )
33 35
34 36 _NARROWACL_SECTION = 'narrowacl'
35 37 _CHANGESPECPART = 'narrow:changespec'
36 38 _RESSPECS = 'narrow:responsespec'
37 39 _SPECPART = 'narrow:spec'
38 40 _SPECPART_INCLUDE = 'include'
39 41 _SPECPART_EXCLUDE = 'exclude'
40 42 _KILLNODESIGNAL = 'KILL'
41 43 _DONESIGNAL = 'DONE'
42 44 _ELIDEDCSHEADER = '>20s20s20sl' # cset id, p1, p2, len(text)
43 45 _ELIDEDMFHEADER = '>20s20s20s20sl' # manifest id, p1, p2, link id, len(text)
44 46 _CSHEADERSIZE = struct.calcsize(_ELIDEDCSHEADER)
45 47 _MFHEADERSIZE = struct.calcsize(_ELIDEDMFHEADER)
46 48
47 49 # Serve a changegroup for a client with a narrow clone.
48 50 def getbundlechangegrouppart_narrow(bundler, repo, source,
49 51 bundlecaps=None, b2caps=None, heads=None,
50 52 common=None, **kwargs):
51 53 assert repo.ui.configbool('experimental', 'narrowservebrokenellipses')
52 54
53 55 cgversions = b2caps.get('changegroup')
54 56 cgversions = [v for v in cgversions
55 57 if v in changegroup.supportedoutgoingversions(repo)]
56 58 if not cgversions:
57 59 raise ValueError(_('no common changegroup version'))
58 60 version = max(cgversions)
59 61
60 62 oldinclude = sorted(filter(bool, kwargs.get(r'oldincludepats', [])))
61 63 oldexclude = sorted(filter(bool, kwargs.get(r'oldexcludepats', [])))
62 64 newinclude = sorted(filter(bool, kwargs.get(r'includepats', [])))
63 65 newexclude = sorted(filter(bool, kwargs.get(r'excludepats', [])))
64 66 known = {bin(n) for n in kwargs.get(r'known', [])}
65 67 generateellipsesbundle2(bundler, repo, oldinclude, oldexclude, newinclude,
66 68 newexclude, version, common, heads, known,
67 69 kwargs.get(r'depth', None))
68 70
69 71 def generateellipsesbundle2(bundler, repo, oldinclude, oldexclude, newinclude,
70 72 newexclude, version, common, heads, known, depth):
71 73 newmatch = narrowspec.match(repo.root, include=newinclude,
72 74 exclude=newexclude)
73 75 if depth is not None:
74 76 depth = int(depth)
75 77 if depth < 1:
76 78 raise error.Abort(_('depth must be positive, got %d') % depth)
77 79
78 80 heads = set(heads or repo.heads())
79 81 common = set(common or [nullid])
80 82 if known and (oldinclude != newinclude or oldexclude != newexclude):
81 83 # Steps:
82 84 # 1. Send kill for "$known & ::common"
83 85 #
84 86 # 2. Send changegroup for ::common
85 87 #
86 88 # 3. Proceed.
87 89 #
88 90 # In the future, we can send kills for only the specific
89 91 # nodes we know should go away or change shape, and then
90 92 # send a data stream that tells the client something like this:
91 93 #
92 94 # a) apply this changegroup
93 95 # b) apply nodes XXX, YYY, ZZZ that you already have
94 96 # c) goto a
95 97 #
96 98 # until they've built up the full new state.
97 99 # Convert to revnums and intersect with "common". The client should
98 100 # have made it a subset of "common" already, but let's be safe.
99 101 known = set(repo.revs("%ln & ::%ln", known, common))
100 102 # TODO: we could send only roots() of this set, and the
101 103 # list of nodes in common, and the client could work out
102 104 # what to strip, instead of us explicitly sending every
103 105 # single node.
104 106 deadrevs = known
105 107 def genkills():
106 108 for r in deadrevs:
107 109 yield _KILLNODESIGNAL
108 110 yield repo.changelog.node(r)
109 111 yield _DONESIGNAL
110 112 bundler.newpart(_CHANGESPECPART, data=genkills())
111 113 newvisit, newfull, newellipsis = exchange._computeellipsis(
112 114 repo, set(), common, known, newmatch)
113 115 if newvisit:
114 116 packer = changegroup.getbundler(version, repo,
115 117 matcher=newmatch,
116 118 ellipses=True,
117 119 shallow=depth is not None,
118 120 ellipsisroots=newellipsis,
119 121 fullnodes=newfull)
120 122 cgdata = packer.generate(common, newvisit, False, 'narrow_widen')
121 123
122 124 part = bundler.newpart('changegroup', data=cgdata)
123 125 part.addparam('version', version)
124 126 if 'treemanifest' in repo.requirements:
125 127 part.addparam('treemanifest', '1')
126 128
127 129 visitnodes, relevant_nodes, ellipsisroots = exchange._computeellipsis(
128 130 repo, common, heads, set(), newmatch, depth=depth)
129 131
130 132 repo.ui.debug('Found %d relevant revs\n' % len(relevant_nodes))
131 133 if visitnodes:
132 134 packer = changegroup.getbundler(version, repo,
133 135 matcher=newmatch,
134 136 ellipses=True,
135 137 shallow=depth is not None,
136 138 ellipsisroots=ellipsisroots,
137 139 fullnodes=relevant_nodes)
138 140 cgdata = packer.generate(common, visitnodes, False, 'narrow_widen')
139 141
140 142 part = bundler.newpart('changegroup', data=cgdata)
141 143 part.addparam('version', version)
142 144 if 'treemanifest' in repo.requirements:
143 145 part.addparam('treemanifest', '1')
144 146
145 147 @bundle2.parthandler(_SPECPART, (_SPECPART_INCLUDE, _SPECPART_EXCLUDE))
146 148 def _handlechangespec_2(op, inpart):
147 149 # XXX: This bundle2 handling is buggy and should be removed after hg5.2 is
148 150 # released. New servers will send a mandatory bundle2 part named
149 151 # 'Narrowspec' and will send specs as data instead of params.
150 152 # Refer to issue5952 and 6019
151 153 includepats = set(inpart.params.get(_SPECPART_INCLUDE, '').splitlines())
152 154 excludepats = set(inpart.params.get(_SPECPART_EXCLUDE, '').splitlines())
153 155 narrowspec.validatepatterns(includepats)
154 156 narrowspec.validatepatterns(excludepats)
155 157
156 158 if not repository.NARROW_REQUIREMENT in op.repo.requirements:
157 159 op.repo.requirements.add(repository.NARROW_REQUIREMENT)
158 160 op.repo._writerequirements()
159 161 op.repo.setnarrowpats(includepats, excludepats)
160 162 narrowspec.copytoworkingcopy(op.repo)
161 163
162 164 @bundle2.parthandler(_RESSPECS)
163 165 def _handlenarrowspecs(op, inpart):
164 166 data = inpart.read()
165 167 inc, exc = data.split('\0')
166 168 includepats = set(inc.splitlines())
167 169 excludepats = set(exc.splitlines())
168 170 narrowspec.validatepatterns(includepats)
169 171 narrowspec.validatepatterns(excludepats)
170 172
171 173 if repository.NARROW_REQUIREMENT not in op.repo.requirements:
172 174 op.repo.requirements.add(repository.NARROW_REQUIREMENT)
173 175 op.repo._writerequirements()
174 176 op.repo.setnarrowpats(includepats, excludepats)
175 177 narrowspec.copytoworkingcopy(op.repo)
176 178
177 179 @bundle2.parthandler(_CHANGESPECPART)
178 180 def _handlechangespec(op, inpart):
179 181 repo = op.repo
180 182 cl = repo.changelog
181 183
182 184 # changesets which need to be stripped entirely. either they're no longer
183 185 # needed in the new narrow spec, or the server is sending a replacement
184 186 # in the changegroup part.
185 187 clkills = set()
186 188
187 189 # A changespec part contains all the updates to ellipsis nodes
188 190 # that will happen as a result of widening or narrowing a
189 191 # repo. All the changes that this block encounters are ellipsis
190 192 # nodes or flags to kill an existing ellipsis.
191 193 chunksignal = changegroup.readexactly(inpart, 4)
192 194 while chunksignal != _DONESIGNAL:
193 195 if chunksignal == _KILLNODESIGNAL:
194 196 # a node used to be an ellipsis but isn't anymore
195 197 ck = changegroup.readexactly(inpart, 20)
196 198 if cl.hasnode(ck):
197 199 clkills.add(ck)
198 200 else:
199 201 raise error.Abort(
200 202 _('unexpected changespec node chunk type: %s') % chunksignal)
201 203 chunksignal = changegroup.readexactly(inpart, 4)
202 204
203 205 if clkills:
204 206 # preserve bookmarks that repair.strip() would otherwise strip
205 207 op._bookmarksbackup = repo._bookmarks
206 208 class dummybmstore(dict):
207 209 def applychanges(self, repo, tr, changes):
208 210 pass
209 211 localrepo.localrepository._bookmarks.set(repo, dummybmstore())
210 212 chgrpfile = repair.strip(op.ui, repo, list(clkills), backup=True,
211 213 topic='widen')
212 214 if chgrpfile:
213 215 op._widen_uninterr = repo.ui.uninterruptible()
214 216 op._widen_uninterr.__enter__()
215 217 # presence of _widen_bundle attribute activates widen handler later
216 218 op._widen_bundle = chgrpfile
217 219 # Set the new narrowspec if we're widening. The setnewnarrowpats() method
218 220 # will currently always be there when using the core+narrowhg server, but
219 221 # other servers may include a changespec part even when not widening (e.g.
220 222 # because we're deepening a shallow repo).
221 223 if util.safehasattr(repo, 'setnewnarrowpats'):
222 224 repo.setnewnarrowpats()
223 225
224 226 def handlechangegroup_widen(op, inpart):
225 227 """Changegroup exchange handler which restores temporarily-stripped nodes"""
226 228 # We saved a bundle with stripped node data we must now restore.
227 229 # This approach is based on mercurial/repair.py@6ee26a53c111.
228 230 repo = op.repo
229 231 ui = op.ui
230 232
231 233 chgrpfile = op._widen_bundle
232 234 del op._widen_bundle
233 235 vfs = repo.vfs
234 236
235 237 ui.note(_("adding branch\n"))
236 238 f = vfs.open(chgrpfile, "rb")
237 239 try:
238 240 gen = exchange.readbundle(ui, f, chgrpfile, vfs)
239 241 if not ui.verbose:
240 242 # silence internal shuffling chatter
241 243 ui.pushbuffer()
242 244 if isinstance(gen, bundle2.unbundle20):
243 245 with repo.transaction('strip') as tr:
244 246 bundle2.processbundle(repo, gen, lambda: tr)
245 247 else:
246 248 gen.apply(repo, 'strip', 'bundle:' + vfs.join(chgrpfile), True)
247 249 if not ui.verbose:
248 250 ui.popbuffer()
249 251 finally:
250 252 f.close()
251 253
252 254 # remove undo files
253 255 for undovfs, undofile in repo.undofiles():
254 256 try:
255 257 undovfs.unlink(undofile)
256 258 except OSError as e:
257 259 if e.errno != errno.ENOENT:
258 260 ui.warn(_('error removing %s: %s\n') %
259 261 (undovfs.join(undofile), stringutil.forcebytestr(e)))
260 262
261 263 # Remove partial backup only if there were no exceptions
262 264 op._widen_uninterr.__exit__(None, None, None)
263 265 vfs.unlink(chgrpfile)
264 266
265 267 def setup():
266 268 """Enable narrow repo support in bundle2-related extension points."""
267 269 getbundleargs = wireprototypes.GETBUNDLE_ARGUMENTS
268 270
269 271 getbundleargs['narrow'] = 'boolean'
270 272 getbundleargs['depth'] = 'plain'
271 273 getbundleargs['oldincludepats'] = 'csv'
272 274 getbundleargs['oldexcludepats'] = 'csv'
273 275 getbundleargs['known'] = 'csv'
274 276
275 277 # Extend changegroup serving to handle requests from narrow clients.
276 278 origcgfn = exchange.getbundle2partsmapping['changegroup']
277 279 def wrappedcgfn(*args, **kwargs):
278 280 repo = args[1]
279 281 if repo.ui.has_section(_NARROWACL_SECTION):
280 282 kwargs = exchange.applynarrowacl(repo, kwargs)
281 283
282 284 if (kwargs.get(r'narrow', False) and
283 285 repo.ui.configbool('experimental', 'narrowservebrokenellipses')):
284 286 getbundlechangegrouppart_narrow(*args, **kwargs)
285 287 else:
286 288 origcgfn(*args, **kwargs)
287 289 exchange.getbundle2partsmapping['changegroup'] = wrappedcgfn
288 290
289 291 # Extend changegroup receiver so client can fixup after widen requests.
290 292 origcghandler = bundle2.parthandlermapping['changegroup']
291 293 def wrappedcghandler(op, inpart):
292 294 origcghandler(op, inpart)
293 295 if util.safehasattr(op, '_widen_bundle'):
294 296 handlechangegroup_widen(op, inpart)
295 297 if util.safehasattr(op, '_bookmarksbackup'):
296 298 localrepo.localrepository._bookmarks.set(op.repo,
297 299 op._bookmarksbackup)
298 300 del op._bookmarksbackup
299 301
300 302 wrappedcghandler.params = origcghandler.params
301 303 bundle2.parthandlermapping['changegroup'] = wrappedcghandler
@@ -1,478 +1,480
1 1 # narrowcommands.py - command modifications for narrowhg extension
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 from __future__ import absolute_import
8 8
9 9 import itertools
10 10 import os
11 11
12 12 from mercurial.i18n import _
13 13 from mercurial import (
14 14 bundle2,
15 15 cmdutil,
16 16 commands,
17 17 discovery,
18 18 encoding,
19 19 error,
20 20 exchange,
21 21 extensions,
22 22 hg,
23 23 narrowspec,
24 24 node,
25 25 pycompat,
26 26 registrar,
27 27 repair,
28 repository,
29 28 repoview,
30 29 sparse,
31 30 util,
32 31 wireprototypes,
33 32 )
33 from mercurial.interfaces import (
34 repository,
35 )
34 36
35 37 table = {}
36 38 command = registrar.command(table)
37 39
38 40 def setup():
39 41 """Wraps user-facing mercurial commands with narrow-aware versions."""
40 42
41 43 entry = extensions.wrapcommand(commands.table, 'clone', clonenarrowcmd)
42 44 entry[1].append(('', 'narrow', None,
43 45 _("create a narrow clone of select files")))
44 46 entry[1].append(('', 'depth', '',
45 47 _("limit the history fetched by distance from heads")))
46 48 entry[1].append(('', 'narrowspec', '',
47 49 _("read narrowspecs from file")))
48 50 # TODO(durin42): unify sparse/narrow --include/--exclude logic a bit
49 51 if 'sparse' not in extensions.enabled():
50 52 entry[1].append(('', 'include', [],
51 53 _("specifically fetch this file/directory")))
52 54 entry[1].append(
53 55 ('', 'exclude', [],
54 56 _("do not fetch this file/directory, even if included")))
55 57
56 58 entry = extensions.wrapcommand(commands.table, 'pull', pullnarrowcmd)
57 59 entry[1].append(('', 'depth', '',
58 60 _("limit the history fetched by distance from heads")))
59 61
60 62 extensions.wrapcommand(commands.table, 'archive', archivenarrowcmd)
61 63
62 64 def clonenarrowcmd(orig, ui, repo, *args, **opts):
63 65 """Wraps clone command, so 'hg clone' first wraps localrepo.clone()."""
64 66 opts = pycompat.byteskwargs(opts)
65 67 wrappedextraprepare = util.nullcontextmanager()
66 68 narrowspecfile = opts['narrowspec']
67 69
68 70 if narrowspecfile:
69 71 filepath = os.path.join(encoding.getcwd(), narrowspecfile)
70 72 ui.status(_("reading narrowspec from '%s'\n") % filepath)
71 73 try:
72 74 fdata = util.readfile(filepath)
73 75 except IOError as inst:
74 76 raise error.Abort(_("cannot read narrowspecs from '%s': %s") %
75 77 (filepath, encoding.strtolocal(inst.strerror)))
76 78
77 79 includes, excludes, profiles = sparse.parseconfig(ui, fdata, 'narrow')
78 80 if profiles:
79 81 raise error.Abort(_("cannot specify other files using '%include' in"
80 82 " narrowspec"))
81 83
82 84 narrowspec.validatepatterns(includes)
83 85 narrowspec.validatepatterns(excludes)
84 86
85 87 # narrowspec is passed so we should assume that user wants narrow clone
86 88 opts['narrow'] = True
87 89 opts['include'].extend(includes)
88 90 opts['exclude'].extend(excludes)
89 91
90 92 if opts['narrow']:
91 93 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
92 94 orig(pullop, kwargs)
93 95
94 96 if opts.get('depth'):
95 97 kwargs['depth'] = opts['depth']
96 98 wrappedextraprepare = extensions.wrappedfunction(exchange,
97 99 '_pullbundle2extraprepare', pullbundle2extraprepare_widen)
98 100
99 101 with wrappedextraprepare:
100 102 return orig(ui, repo, *args, **pycompat.strkwargs(opts))
101 103
102 104 def pullnarrowcmd(orig, ui, repo, *args, **opts):
103 105 """Wraps pull command to allow modifying narrow spec."""
104 106 wrappedextraprepare = util.nullcontextmanager()
105 107 if repository.NARROW_REQUIREMENT in repo.requirements:
106 108
107 109 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
108 110 orig(pullop, kwargs)
109 111 if opts.get(r'depth'):
110 112 kwargs['depth'] = opts[r'depth']
111 113 wrappedextraprepare = extensions.wrappedfunction(exchange,
112 114 '_pullbundle2extraprepare', pullbundle2extraprepare_widen)
113 115
114 116 with wrappedextraprepare:
115 117 return orig(ui, repo, *args, **opts)
116 118
117 119 def archivenarrowcmd(orig, ui, repo, *args, **opts):
118 120 """Wraps archive command to narrow the default includes."""
119 121 if repository.NARROW_REQUIREMENT in repo.requirements:
120 122 repo_includes, repo_excludes = repo.narrowpats
121 123 includes = set(opts.get(r'include', []))
122 124 excludes = set(opts.get(r'exclude', []))
123 125 includes, excludes, unused_invalid = narrowspec.restrictpatterns(
124 126 includes, excludes, repo_includes, repo_excludes)
125 127 if includes:
126 128 opts[r'include'] = includes
127 129 if excludes:
128 130 opts[r'exclude'] = excludes
129 131 return orig(ui, repo, *args, **opts)
130 132
131 133 def pullbundle2extraprepare(orig, pullop, kwargs):
132 134 repo = pullop.repo
133 135 if repository.NARROW_REQUIREMENT not in repo.requirements:
134 136 return orig(pullop, kwargs)
135 137
136 138 if wireprototypes.NARROWCAP not in pullop.remote.capabilities():
137 139 raise error.Abort(_("server does not support narrow clones"))
138 140 orig(pullop, kwargs)
139 141 kwargs['narrow'] = True
140 142 include, exclude = repo.narrowpats
141 143 kwargs['oldincludepats'] = include
142 144 kwargs['oldexcludepats'] = exclude
143 145 if include:
144 146 kwargs['includepats'] = include
145 147 if exclude:
146 148 kwargs['excludepats'] = exclude
147 149 # calculate known nodes only in ellipses cases because in non-ellipses cases
148 150 # we have all the nodes
149 151 if wireprototypes.ELLIPSESCAP1 in pullop.remote.capabilities():
150 152 kwargs['known'] = [node.hex(ctx.node()) for ctx in
151 153 repo.set('::%ln', pullop.common)
152 154 if ctx.node() != node.nullid]
153 155 if not kwargs['known']:
154 156 # Mercurial serializes an empty list as '' and deserializes it as
155 157 # [''], so delete it instead to avoid handling the empty string on
156 158 # the server.
157 159 del kwargs['known']
158 160
159 161 extensions.wrapfunction(exchange,'_pullbundle2extraprepare',
160 162 pullbundle2extraprepare)
161 163
162 164 def _narrow(ui, repo, remote, commoninc, oldincludes, oldexcludes,
163 165 newincludes, newexcludes, force):
164 166 oldmatch = narrowspec.match(repo.root, oldincludes, oldexcludes)
165 167 newmatch = narrowspec.match(repo.root, newincludes, newexcludes)
166 168
167 169 # This is essentially doing "hg outgoing" to find all local-only
168 170 # commits. We will then check that the local-only commits don't
169 171 # have any changes to files that will be untracked.
170 172 unfi = repo.unfiltered()
171 173 outgoing = discovery.findcommonoutgoing(unfi, remote,
172 174 commoninc=commoninc)
173 175 ui.status(_('looking for local changes to affected paths\n'))
174 176 localnodes = []
175 177 for n in itertools.chain(outgoing.missing, outgoing.excluded):
176 178 if any(oldmatch(f) and not newmatch(f) for f in unfi[n].files()):
177 179 localnodes.append(n)
178 180 revstostrip = unfi.revs('descendants(%ln)', localnodes)
179 181 hiddenrevs = repoview.filterrevs(repo, 'visible')
180 182 visibletostrip = list(repo.changelog.node(r)
181 183 for r in (revstostrip - hiddenrevs))
182 184 if visibletostrip:
183 185 ui.status(_('The following changeset(s) or their ancestors have '
184 186 'local changes not on the remote:\n'))
185 187 maxnodes = 10
186 188 if ui.verbose or len(visibletostrip) <= maxnodes:
187 189 for n in visibletostrip:
188 190 ui.status('%s\n' % node.short(n))
189 191 else:
190 192 for n in visibletostrip[:maxnodes]:
191 193 ui.status('%s\n' % node.short(n))
192 194 ui.status(_('...and %d more, use --verbose to list all\n') %
193 195 (len(visibletostrip) - maxnodes))
194 196 if not force:
195 197 raise error.Abort(_('local changes found'),
196 198 hint=_('use --force-delete-local-changes to '
197 199 'ignore'))
198 200
199 201 with ui.uninterruptible():
200 202 if revstostrip:
201 203 tostrip = [unfi.changelog.node(r) for r in revstostrip]
202 204 if repo['.'].node() in tostrip:
203 205 # stripping working copy, so move to a different commit first
204 206 urev = max(repo.revs('(::%n) - %ln + null',
205 207 repo['.'].node(), visibletostrip))
206 208 hg.clean(repo, urev)
207 209 overrides = {('devel', 'strip-obsmarkers'): False}
208 210 with ui.configoverride(overrides, 'narrow'):
209 211 repair.strip(ui, unfi, tostrip, topic='narrow')
210 212
211 213 todelete = []
212 214 for f, f2, size in repo.store.datafiles():
213 215 if f.startswith('data/'):
214 216 file = f[5:-2]
215 217 if not newmatch(file):
216 218 todelete.append(f)
217 219 elif f.startswith('meta/'):
218 220 dir = f[5:-13]
219 221 dirs = sorted(util.dirs({dir})) + [dir]
220 222 include = True
221 223 for d in dirs:
222 224 visit = newmatch.visitdir(d)
223 225 if not visit:
224 226 include = False
225 227 break
226 228 if visit == 'all':
227 229 break
228 230 if not include:
229 231 todelete.append(f)
230 232
231 233 repo.destroying()
232 234
233 235 with repo.transaction('narrowing'):
234 236 # Update narrowspec before removing revlogs, so repo won't be
235 237 # corrupt in case of crash
236 238 repo.setnarrowpats(newincludes, newexcludes)
237 239
238 240 for f in todelete:
239 241 ui.status(_('deleting %s\n') % f)
240 242 util.unlinkpath(repo.svfs.join(f))
241 243 repo.store.markremoved(f)
242 244
243 245 narrowspec.updateworkingcopy(repo, assumeclean=True)
244 246 narrowspec.copytoworkingcopy(repo)
245 247
246 248 repo.destroyed()
247 249
248 250 def _widen(ui, repo, remote, commoninc, oldincludes, oldexcludes,
249 251 newincludes, newexcludes):
250 252 # for now we assume that if a server has ellipses enabled, we will be
251 253 # exchanging ellipses nodes. In future we should add ellipses as a client
252 254 # side requirement (maybe) to distinguish a client is shallow or not and
253 255 # then send that information to server whether we want ellipses or not.
254 256 # Theoretically a non-ellipses repo should be able to use narrow
255 257 # functionality from an ellipses enabled server
256 258 remotecap = remote.capabilities()
257 259 ellipsesremote = any(cap in remotecap
258 260 for cap in wireprototypes.SUPPORTED_ELLIPSESCAP)
259 261
260 262 # check whether we are talking to a server which supports old version of
261 263 # ellipses capabilities
262 264 isoldellipses = (ellipsesremote and wireprototypes.ELLIPSESCAP1 in
263 265 remotecap and wireprototypes.ELLIPSESCAP not in remotecap)
264 266
265 267 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
266 268 orig(pullop, kwargs)
267 269 # The old{in,ex}cludepats have already been set by orig()
268 270 kwargs['includepats'] = newincludes
269 271 kwargs['excludepats'] = newexcludes
270 272 wrappedextraprepare = extensions.wrappedfunction(exchange,
271 273 '_pullbundle2extraprepare', pullbundle2extraprepare_widen)
272 274
273 275 # define a function that narrowbundle2 can call after creating the
274 276 # backup bundle, but before applying the bundle from the server
275 277 def setnewnarrowpats():
276 278 repo.setnarrowpats(newincludes, newexcludes)
277 279 repo.setnewnarrowpats = setnewnarrowpats
278 280 # silence the devel-warning of applying an empty changegroup
279 281 overrides = {('devel', 'all-warnings'): False}
280 282
281 283 common = commoninc[0]
282 284 with ui.uninterruptible():
283 285 if ellipsesremote:
284 286 ds = repo.dirstate
285 287 p1, p2 = ds.p1(), ds.p2()
286 288 with ds.parentchange():
287 289 ds.setparents(node.nullid, node.nullid)
288 290 if isoldellipses:
289 291 with wrappedextraprepare:
290 292 exchange.pull(repo, remote, heads=common)
291 293 else:
292 294 known = []
293 295 if ellipsesremote:
294 296 known = [node.hex(ctx.node()) for ctx in
295 297 repo.set('::%ln', common)
296 298 if ctx.node() != node.nullid]
297 299 with remote.commandexecutor() as e:
298 300 bundle = e.callcommand('narrow_widen', {
299 301 'oldincludes': oldincludes,
300 302 'oldexcludes': oldexcludes,
301 303 'newincludes': newincludes,
302 304 'newexcludes': newexcludes,
303 305 'cgversion': '03',
304 306 'commonheads': common,
305 307 'known': known,
306 308 'ellipses': ellipsesremote,
307 309 }).result()
308 310
309 311 trmanager = exchange.transactionmanager(repo, 'widen', remote.url())
310 312 with trmanager, repo.ui.configoverride(overrides, 'widen'):
311 313 op = bundle2.bundleoperation(repo, trmanager.transaction,
312 314 source='widen')
313 315 # TODO: we should catch error.Abort here
314 316 bundle2.processbundle(repo, bundle, op=op)
315 317
316 318 if ellipsesremote:
317 319 with ds.parentchange():
318 320 ds.setparents(p1, p2)
319 321
320 322 with repo.transaction('widening'):
321 323 repo.setnewnarrowpats()
322 324 narrowspec.updateworkingcopy(repo)
323 325 narrowspec.copytoworkingcopy(repo)
324 326
325 327 # TODO(rdamazio): Make new matcher format and update description
326 328 @command('tracked',
327 329 [('', 'addinclude', [], _('new paths to include')),
328 330 ('', 'removeinclude', [], _('old paths to no longer include')),
329 331 ('', 'addexclude', [], _('new paths to exclude')),
330 332 ('', 'import-rules', '', _('import narrowspecs from a file')),
331 333 ('', 'removeexclude', [], _('old paths to no longer exclude')),
332 334 ('', 'clear', False, _('whether to replace the existing narrowspec')),
333 335 ('', 'force-delete-local-changes', False,
334 336 _('forces deletion of local changes when narrowing')),
335 337 ('', 'update-working-copy', False,
336 338 _('update working copy when the store has changed')),
337 339 ] + commands.remoteopts,
338 340 _('[OPTIONS]... [REMOTE]'),
339 341 inferrepo=True)
340 342 def trackedcmd(ui, repo, remotepath=None, *pats, **opts):
341 343 """show or change the current narrowspec
342 344
343 345 With no argument, shows the current narrowspec entries, one per line. Each
344 346 line will be prefixed with 'I' or 'X' for included or excluded patterns,
345 347 respectively.
346 348
347 349 The narrowspec is comprised of expressions to match remote files and/or
348 350 directories that should be pulled into your client.
349 351 The narrowspec has *include* and *exclude* expressions, with excludes always
350 352 trumping includes: that is, if a file matches an exclude expression, it will
351 353 be excluded even if it also matches an include expression.
352 354 Excluding files that were never included has no effect.
353 355
354 356 Each included or excluded entry is in the format described by
355 357 'hg help patterns'.
356 358
357 359 The options allow you to add or remove included and excluded expressions.
358 360
359 361 If --clear is specified, then all previous includes and excludes are DROPPED
360 362 and replaced by the new ones specified to --addinclude and --addexclude.
361 363 If --clear is specified without any further options, the narrowspec will be
362 364 empty and will not match any files.
363 365
364 366 --import-rules accepts a path to a file containing rules, allowing you to
365 367 add --addinclude, --addexclude rules in bulk. Like the other include and
366 368 exclude switches, the changes are applied immediately.
367 369 """
368 370 opts = pycompat.byteskwargs(opts)
369 371 if repository.NARROW_REQUIREMENT not in repo.requirements:
370 372 raise error.Abort(_('the tracked command is only supported on '
371 373 'repositories cloned with --narrow'))
372 374
373 375 # Before supporting, decide whether it "hg tracked --clear" should mean
374 376 # tracking no paths or all paths.
375 377 if opts['clear']:
376 378 raise error.Abort(_('the --clear option is not yet supported'))
377 379
378 380 # import rules from a file
379 381 newrules = opts.get('import_rules')
380 382 if newrules:
381 383 try:
382 384 filepath = os.path.join(encoding.getcwd(), newrules)
383 385 fdata = util.readfile(filepath)
384 386 except IOError as inst:
385 387 raise error.Abort(_("cannot read narrowspecs from '%s': %s") %
386 388 (filepath, encoding.strtolocal(inst.strerror)))
387 389 includepats, excludepats, profiles = sparse.parseconfig(ui, fdata,
388 390 'narrow')
389 391 if profiles:
390 392 raise error.Abort(_("including other spec files using '%include' "
391 393 "is not supported in narrowspec"))
392 394 opts['addinclude'].extend(includepats)
393 395 opts['addexclude'].extend(excludepats)
394 396
395 397 addedincludes = narrowspec.parsepatterns(opts['addinclude'])
396 398 removedincludes = narrowspec.parsepatterns(opts['removeinclude'])
397 399 addedexcludes = narrowspec.parsepatterns(opts['addexclude'])
398 400 removedexcludes = narrowspec.parsepatterns(opts['removeexclude'])
399 401
400 402 update_working_copy = opts['update_working_copy']
401 403 only_show = not (addedincludes or removedincludes or addedexcludes or
402 404 removedexcludes or newrules or update_working_copy)
403 405
404 406 oldincludes, oldexcludes = repo.narrowpats
405 407
406 408 # filter the user passed additions and deletions into actual additions and
407 409 # deletions of excludes and includes
408 410 addedincludes -= oldincludes
409 411 removedincludes &= oldincludes
410 412 addedexcludes -= oldexcludes
411 413 removedexcludes &= oldexcludes
412 414
413 415 widening = addedincludes or removedexcludes
414 416 narrowing = removedincludes or addedexcludes
415 417
416 418 # Only print the current narrowspec.
417 419 if only_show:
418 420 ui.pager('tracked')
419 421 fm = ui.formatter('narrow', opts)
420 422 for i in sorted(oldincludes):
421 423 fm.startitem()
422 424 fm.write('status', '%s ', 'I', label='narrow.included')
423 425 fm.write('pat', '%s\n', i, label='narrow.included')
424 426 for i in sorted(oldexcludes):
425 427 fm.startitem()
426 428 fm.write('status', '%s ', 'X', label='narrow.excluded')
427 429 fm.write('pat', '%s\n', i, label='narrow.excluded')
428 430 fm.end()
429 431 return 0
430 432
431 433 if update_working_copy:
432 434 with repo.wlock(), repo.lock(), repo.transaction('narrow-wc'):
433 435 narrowspec.updateworkingcopy(repo)
434 436 narrowspec.copytoworkingcopy(repo)
435 437 return 0
436 438
437 439 if not widening and not narrowing:
438 440 ui.status(_("nothing to widen or narrow\n"))
439 441 return 0
440 442
441 443 with repo.wlock(), repo.lock():
442 444 cmdutil.bailifchanged(repo)
443 445
444 446 # Find the revisions we have in common with the remote. These will
445 447 # be used for finding local-only changes for narrowing. They will
446 448 # also define the set of revisions to update for widening.
447 449 remotepath = ui.expandpath(remotepath or 'default')
448 450 url, branches = hg.parseurl(remotepath)
449 451 ui.status(_('comparing with %s\n') % util.hidepassword(url))
450 452 remote = hg.peer(repo, opts, url)
451 453
452 454 # check narrow support before doing anything if widening needs to be
453 455 # performed. In future we should also abort if client is ellipses and
454 456 # server does not support ellipses
455 457 if widening and wireprototypes.NARROWCAP not in remote.capabilities():
456 458 raise error.Abort(_("server does not support narrow clones"))
457 459
458 460 commoninc = discovery.findcommonincoming(repo, remote)
459 461
460 462 if narrowing:
461 463 newincludes = oldincludes - removedincludes
462 464 newexcludes = oldexcludes | addedexcludes
463 465 _narrow(ui, repo, remote, commoninc, oldincludes, oldexcludes,
464 466 newincludes, newexcludes,
465 467 opts['force_delete_local_changes'])
466 468 # _narrow() updated the narrowspec and _widen() below needs to
467 469 # use the updated values as its base (otherwise removed includes
468 470 # and addedexcludes will be lost in the resulting narrowspec)
469 471 oldincludes = newincludes
470 472 oldexcludes = newexcludes
471 473
472 474 if widening:
473 475 newincludes = oldincludes | addedincludes
474 476 newexcludes = oldexcludes - removedexcludes
475 477 _widen(ui, repo, remote, commoninc, oldincludes, oldexcludes,
476 478 newincludes, newexcludes)
477 479
478 480 return 0
@@ -1,1174 +1,1176
1 1 # sqlitestore.py - Storage backend that uses SQLite
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """store repository data in SQLite (EXPERIMENTAL)
9 9
10 10 The sqlitestore extension enables the storage of repository data in SQLite.
11 11
12 12 This extension is HIGHLY EXPERIMENTAL. There are NO BACKWARDS COMPATIBILITY
13 13 GUARANTEES. This means that repositories created with this extension may
14 14 only be usable with the exact version of this extension/Mercurial that was
15 15 used. The extension attempts to enforce this in order to prevent repository
16 16 corruption.
17 17
18 18 In addition, several features are not yet supported or have known bugs:
19 19
20 20 * Only some data is stored in SQLite. Changeset, manifest, and other repository
21 21 data is not yet stored in SQLite.
22 22 * Transactions are not robust. If the process is aborted at the right time
23 23 during transaction close/rollback, the repository could be in an inconsistent
24 24 state. This problem will diminish once all repository data is tracked by
25 25 SQLite.
26 26 * Bundle repositories do not work (the ability to use e.g.
27 27 `hg -R <bundle-file> log` to automatically overlay a bundle on top of the
28 28 existing repository).
29 29 * Various other features don't work.
30 30
31 31 This extension should work for basic clone/pull, update, and commit workflows.
32 32 Some history rewriting operations may fail due to lack of support for bundle
33 33 repositories.
34 34
35 35 To use, activate the extension and set the ``storage.new-repo-backend`` config
36 36 option to ``sqlite`` to enable new repositories to use SQLite for storage.
37 37 """
38 38
39 39 # To run the test suite with repos using SQLite by default, execute the
40 40 # following:
41 41 #
42 42 # HGREPOFEATURES="sqlitestore" run-tests.py \
43 43 # --extra-config-opt extensions.sqlitestore= \
44 44 # --extra-config-opt storage.new-repo-backend=sqlite
45 45
46 46 from __future__ import absolute_import
47 47
48 48 import hashlib
49 49 import sqlite3
50 50 import struct
51 51 import threading
52 52 import zlib
53 53
54 54 from mercurial.i18n import _
55 55 from mercurial.node import (
56 56 nullid,
57 57 nullrev,
58 58 short,
59 59 )
60 60 from mercurial.thirdparty import (
61 61 attr,
62 62 )
63 63 from mercurial import (
64 64 ancestor,
65 65 dagop,
66 66 encoding,
67 67 error,
68 68 extensions,
69 69 localrepo,
70 70 mdiff,
71 71 pycompat,
72 72 registrar,
73 repository,
74 73 util,
75 74 verify,
76 75 )
76 from mercurial.interfaces import (
77 repository,
78 )
77 79 from mercurial.utils import (
78 80 interfaceutil,
79 81 storageutil,
80 82 )
81 83
82 84 try:
83 85 from mercurial import zstd
84 86 zstd.__version__
85 87 except ImportError:
86 88 zstd = None
87 89
88 90 configtable = {}
89 91 configitem = registrar.configitem(configtable)
90 92
91 93 # experimental config: storage.sqlite.compression
92 94 configitem('storage', 'sqlite.compression',
93 95 default='zstd' if zstd else 'zlib',
94 96 experimental=True)
95 97
96 98 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
97 99 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
98 100 # be specifying the version(s) of Mercurial they are tested with, or
99 101 # leave the attribute unspecified.
100 102 testedwith = 'ships-with-hg-core'
101 103
102 104 REQUIREMENT = b'exp-sqlite-001'
103 105 REQUIREMENT_ZSTD = b'exp-sqlite-comp-001=zstd'
104 106 REQUIREMENT_ZLIB = b'exp-sqlite-comp-001=zlib'
105 107 REQUIREMENT_NONE = b'exp-sqlite-comp-001=none'
106 108 REQUIREMENT_SHALLOW_FILES = b'exp-sqlite-shallow-files'
107 109
108 110 CURRENT_SCHEMA_VERSION = 1
109 111
110 112 COMPRESSION_NONE = 1
111 113 COMPRESSION_ZSTD = 2
112 114 COMPRESSION_ZLIB = 3
113 115
114 116 FLAG_CENSORED = 1
115 117 FLAG_MISSING_P1 = 2
116 118 FLAG_MISSING_P2 = 4
117 119
118 120 CREATE_SCHEMA = [
119 121 # Deltas are stored as content-indexed blobs.
120 122 # compression column holds COMPRESSION_* constant for how the
121 123 # delta is encoded.
122 124
123 125 r'CREATE TABLE delta ('
124 126 r' id INTEGER PRIMARY KEY, '
125 127 r' compression INTEGER NOT NULL, '
126 128 r' hash BLOB UNIQUE ON CONFLICT ABORT, '
127 129 r' delta BLOB NOT NULL '
128 130 r')',
129 131
130 132 # Tracked paths are denormalized to integers to avoid redundant
131 133 # storage of the path name.
132 134 r'CREATE TABLE filepath ('
133 135 r' id INTEGER PRIMARY KEY, '
134 136 r' path BLOB NOT NULL '
135 137 r')',
136 138
137 139 r'CREATE UNIQUE INDEX filepath_path '
138 140 r' ON filepath (path)',
139 141
140 142 # We have a single table for all file revision data.
141 143 # Each file revision is uniquely described by a (path, rev) and
142 144 # (path, node).
143 145 #
144 146 # Revision data is stored as a pointer to the delta producing this
145 147 # revision and the file revision whose delta should be applied before
146 148 # that one. One can reconstruct the delta chain by recursively following
147 149 # the delta base revision pointers until one encounters NULL.
148 150 #
149 151 # flags column holds bitwise integer flags controlling storage options.
150 152 # These flags are defined by the FLAG_* constants.
151 153 r'CREATE TABLE fileindex ('
152 154 r' id INTEGER PRIMARY KEY, '
153 155 r' pathid INTEGER REFERENCES filepath(id), '
154 156 r' revnum INTEGER NOT NULL, '
155 157 r' p1rev INTEGER NOT NULL, '
156 158 r' p2rev INTEGER NOT NULL, '
157 159 r' linkrev INTEGER NOT NULL, '
158 160 r' flags INTEGER NOT NULL, '
159 161 r' deltaid INTEGER REFERENCES delta(id), '
160 162 r' deltabaseid INTEGER REFERENCES fileindex(id), '
161 163 r' node BLOB NOT NULL '
162 164 r')',
163 165
164 166 r'CREATE UNIQUE INDEX fileindex_pathrevnum '
165 167 r' ON fileindex (pathid, revnum)',
166 168
167 169 r'CREATE UNIQUE INDEX fileindex_pathnode '
168 170 r' ON fileindex (pathid, node)',
169 171
170 172 # Provide a view over all file data for convenience.
171 173 r'CREATE VIEW filedata AS '
172 174 r'SELECT '
173 175 r' fileindex.id AS id, '
174 176 r' filepath.id AS pathid, '
175 177 r' filepath.path AS path, '
176 178 r' fileindex.revnum AS revnum, '
177 179 r' fileindex.node AS node, '
178 180 r' fileindex.p1rev AS p1rev, '
179 181 r' fileindex.p2rev AS p2rev, '
180 182 r' fileindex.linkrev AS linkrev, '
181 183 r' fileindex.flags AS flags, '
182 184 r' fileindex.deltaid AS deltaid, '
183 185 r' fileindex.deltabaseid AS deltabaseid '
184 186 r'FROM filepath, fileindex '
185 187 r'WHERE fileindex.pathid=filepath.id',
186 188
187 189 r'PRAGMA user_version=%d' % CURRENT_SCHEMA_VERSION,
188 190 ]
189 191
190 192 def resolvedeltachain(db, pathid, node, revisioncache,
191 193 stoprids, zstddctx=None):
192 194 """Resolve a delta chain for a file node."""
193 195
194 196 # TODO the "not in ({stops})" here is possibly slowing down the query
195 197 # because it needs to perform the lookup on every recursive invocation.
196 198 # This could possibly be faster if we created a temporary query with
197 199 # baseid "poisoned" to null and limited the recursive filter to
198 200 # "is not null".
199 201 res = db.execute(
200 202 r'WITH RECURSIVE '
201 203 r' deltachain(deltaid, baseid) AS ('
202 204 r' SELECT deltaid, deltabaseid FROM fileindex '
203 205 r' WHERE pathid=? AND node=? '
204 206 r' UNION ALL '
205 207 r' SELECT fileindex.deltaid, deltabaseid '
206 208 r' FROM fileindex, deltachain '
207 209 r' WHERE '
208 210 r' fileindex.id=deltachain.baseid '
209 211 r' AND deltachain.baseid IS NOT NULL '
210 212 r' AND fileindex.id NOT IN ({stops}) '
211 213 r' ) '
212 214 r'SELECT deltachain.baseid, compression, delta '
213 215 r'FROM deltachain, delta '
214 216 r'WHERE delta.id=deltachain.deltaid'.format(
215 217 stops=r','.join([r'?'] * len(stoprids))),
216 218 tuple([pathid, node] + list(stoprids.keys())))
217 219
218 220 deltas = []
219 221 lastdeltabaseid = None
220 222
221 223 for deltabaseid, compression, delta in res:
222 224 lastdeltabaseid = deltabaseid
223 225
224 226 if compression == COMPRESSION_ZSTD:
225 227 delta = zstddctx.decompress(delta)
226 228 elif compression == COMPRESSION_NONE:
227 229 delta = delta
228 230 elif compression == COMPRESSION_ZLIB:
229 231 delta = zlib.decompress(delta)
230 232 else:
231 233 raise SQLiteStoreError('unhandled compression type: %d' %
232 234 compression)
233 235
234 236 deltas.append(delta)
235 237
236 238 if lastdeltabaseid in stoprids:
237 239 basetext = revisioncache[stoprids[lastdeltabaseid]]
238 240 else:
239 241 basetext = deltas.pop()
240 242
241 243 deltas.reverse()
242 244 fulltext = mdiff.patches(basetext, deltas)
243 245
244 246 # SQLite returns buffer instances for blob columns on Python 2. This
245 247 # type can propagate through the delta application layer. Because
246 248 # downstream callers assume revisions are bytes, cast as needed.
247 249 if not isinstance(fulltext, bytes):
248 250 fulltext = bytes(delta)
249 251
250 252 return fulltext
251 253
252 254 def insertdelta(db, compression, hash, delta):
253 255 try:
254 256 return db.execute(
255 257 r'INSERT INTO delta (compression, hash, delta) '
256 258 r'VALUES (?, ?, ?)',
257 259 (compression, hash, delta)).lastrowid
258 260 except sqlite3.IntegrityError:
259 261 return db.execute(
260 262 r'SELECT id FROM delta WHERE hash=?',
261 263 (hash,)).fetchone()[0]
262 264
263 265 class SQLiteStoreError(error.StorageError):
264 266 pass
265 267
266 268 @attr.s
267 269 class revisionentry(object):
268 270 rid = attr.ib()
269 271 rev = attr.ib()
270 272 node = attr.ib()
271 273 p1rev = attr.ib()
272 274 p2rev = attr.ib()
273 275 p1node = attr.ib()
274 276 p2node = attr.ib()
275 277 linkrev = attr.ib()
276 278 flags = attr.ib()
277 279
278 280 @interfaceutil.implementer(repository.irevisiondelta)
279 281 @attr.s(slots=True)
280 282 class sqliterevisiondelta(object):
281 283 node = attr.ib()
282 284 p1node = attr.ib()
283 285 p2node = attr.ib()
284 286 basenode = attr.ib()
285 287 flags = attr.ib()
286 288 baserevisionsize = attr.ib()
287 289 revision = attr.ib()
288 290 delta = attr.ib()
289 291 linknode = attr.ib(default=None)
290 292
291 293 @interfaceutil.implementer(repository.iverifyproblem)
292 294 @attr.s(frozen=True)
293 295 class sqliteproblem(object):
294 296 warning = attr.ib(default=None)
295 297 error = attr.ib(default=None)
296 298 node = attr.ib(default=None)
297 299
298 300 @interfaceutil.implementer(repository.ifilestorage)
299 301 class sqlitefilestore(object):
300 302 """Implements storage for an individual tracked path."""
301 303
302 304 def __init__(self, db, path, compression):
303 305 self._db = db
304 306 self._path = path
305 307
306 308 self._pathid = None
307 309
308 310 # revnum -> node
309 311 self._revtonode = {}
310 312 # node -> revnum
311 313 self._nodetorev = {}
312 314 # node -> data structure
313 315 self._revisions = {}
314 316
315 317 self._revisioncache = util.lrucachedict(10)
316 318
317 319 self._compengine = compression
318 320
319 321 if compression == 'zstd':
320 322 self._cctx = zstd.ZstdCompressor(level=3)
321 323 self._dctx = zstd.ZstdDecompressor()
322 324 else:
323 325 self._cctx = None
324 326 self._dctx = None
325 327
326 328 self._refreshindex()
327 329
328 330 def _refreshindex(self):
329 331 self._revtonode = {}
330 332 self._nodetorev = {}
331 333 self._revisions = {}
332 334
333 335 res = list(self._db.execute(
334 336 r'SELECT id FROM filepath WHERE path=?', (self._path,)))
335 337
336 338 if not res:
337 339 self._pathid = None
338 340 return
339 341
340 342 self._pathid = res[0][0]
341 343
342 344 res = self._db.execute(
343 345 r'SELECT id, revnum, node, p1rev, p2rev, linkrev, flags '
344 346 r'FROM fileindex '
345 347 r'WHERE pathid=? '
346 348 r'ORDER BY revnum ASC',
347 349 (self._pathid,))
348 350
349 351 for i, row in enumerate(res):
350 352 rid, rev, node, p1rev, p2rev, linkrev, flags = row
351 353
352 354 if i != rev:
353 355 raise SQLiteStoreError(_('sqlite database has inconsistent '
354 356 'revision numbers'))
355 357
356 358 if p1rev == nullrev:
357 359 p1node = nullid
358 360 else:
359 361 p1node = self._revtonode[p1rev]
360 362
361 363 if p2rev == nullrev:
362 364 p2node = nullid
363 365 else:
364 366 p2node = self._revtonode[p2rev]
365 367
366 368 entry = revisionentry(
367 369 rid=rid,
368 370 rev=rev,
369 371 node=node,
370 372 p1rev=p1rev,
371 373 p2rev=p2rev,
372 374 p1node=p1node,
373 375 p2node=p2node,
374 376 linkrev=linkrev,
375 377 flags=flags)
376 378
377 379 self._revtonode[rev] = node
378 380 self._nodetorev[node] = rev
379 381 self._revisions[node] = entry
380 382
381 383 # Start of ifileindex interface.
382 384
383 385 def __len__(self):
384 386 return len(self._revisions)
385 387
386 388 def __iter__(self):
387 389 return iter(pycompat.xrange(len(self._revisions)))
388 390
389 391 def hasnode(self, node):
390 392 if node == nullid:
391 393 return False
392 394
393 395 return node in self._nodetorev
394 396
395 397 def revs(self, start=0, stop=None):
396 398 return storageutil.iterrevs(len(self._revisions), start=start,
397 399 stop=stop)
398 400
399 401 def parents(self, node):
400 402 if node == nullid:
401 403 return nullid, nullid
402 404
403 405 if node not in self._revisions:
404 406 raise error.LookupError(node, self._path, _('no node'))
405 407
406 408 entry = self._revisions[node]
407 409 return entry.p1node, entry.p2node
408 410
409 411 def parentrevs(self, rev):
410 412 if rev == nullrev:
411 413 return nullrev, nullrev
412 414
413 415 if rev not in self._revtonode:
414 416 raise IndexError(rev)
415 417
416 418 entry = self._revisions[self._revtonode[rev]]
417 419 return entry.p1rev, entry.p2rev
418 420
419 421 def rev(self, node):
420 422 if node == nullid:
421 423 return nullrev
422 424
423 425 if node not in self._nodetorev:
424 426 raise error.LookupError(node, self._path, _('no node'))
425 427
426 428 return self._nodetorev[node]
427 429
428 430 def node(self, rev):
429 431 if rev == nullrev:
430 432 return nullid
431 433
432 434 if rev not in self._revtonode:
433 435 raise IndexError(rev)
434 436
435 437 return self._revtonode[rev]
436 438
437 439 def lookup(self, node):
438 440 return storageutil.fileidlookup(self, node, self._path)
439 441
440 442 def linkrev(self, rev):
441 443 if rev == nullrev:
442 444 return nullrev
443 445
444 446 if rev not in self._revtonode:
445 447 raise IndexError(rev)
446 448
447 449 entry = self._revisions[self._revtonode[rev]]
448 450 return entry.linkrev
449 451
450 452 def iscensored(self, rev):
451 453 if rev == nullrev:
452 454 return False
453 455
454 456 if rev not in self._revtonode:
455 457 raise IndexError(rev)
456 458
457 459 return self._revisions[self._revtonode[rev]].flags & FLAG_CENSORED
458 460
459 461 def commonancestorsheads(self, node1, node2):
460 462 rev1 = self.rev(node1)
461 463 rev2 = self.rev(node2)
462 464
463 465 ancestors = ancestor.commonancestorsheads(self.parentrevs, rev1, rev2)
464 466 return pycompat.maplist(self.node, ancestors)
465 467
466 468 def descendants(self, revs):
467 469 # TODO we could implement this using a recursive SQL query, which
468 470 # might be faster.
469 471 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
470 472
471 473 def heads(self, start=None, stop=None):
472 474 if start is None and stop is None:
473 475 if not len(self):
474 476 return [nullid]
475 477
476 478 startrev = self.rev(start) if start is not None else nullrev
477 479 stoprevs = {self.rev(n) for n in stop or []}
478 480
479 481 revs = dagop.headrevssubset(self.revs, self.parentrevs,
480 482 startrev=startrev, stoprevs=stoprevs)
481 483
482 484 return [self.node(rev) for rev in revs]
483 485
484 486 def children(self, node):
485 487 rev = self.rev(node)
486 488
487 489 res = self._db.execute(
488 490 r'SELECT'
489 491 r' node '
490 492 r' FROM filedata '
491 493 r' WHERE path=? AND (p1rev=? OR p2rev=?) '
492 494 r' ORDER BY revnum ASC',
493 495 (self._path, rev, rev))
494 496
495 497 return [row[0] for row in res]
496 498
497 499 # End of ifileindex interface.
498 500
499 501 # Start of ifiledata interface.
500 502
501 503 def size(self, rev):
502 504 if rev == nullrev:
503 505 return 0
504 506
505 507 if rev not in self._revtonode:
506 508 raise IndexError(rev)
507 509
508 510 node = self._revtonode[rev]
509 511
510 512 if self.renamed(node):
511 513 return len(self.read(node))
512 514
513 515 return len(self.revision(node))
514 516
515 517 def revision(self, node, raw=False, _verifyhash=True):
516 518 if node in (nullid, nullrev):
517 519 return b''
518 520
519 521 if isinstance(node, int):
520 522 node = self.node(node)
521 523
522 524 if node not in self._nodetorev:
523 525 raise error.LookupError(node, self._path, _('no node'))
524 526
525 527 if node in self._revisioncache:
526 528 return self._revisioncache[node]
527 529
528 530 # Because we have a fulltext revision cache, we are able to
529 531 # short-circuit delta chain traversal and decompression as soon as
530 532 # we encounter a revision in the cache.
531 533
532 534 stoprids = {self._revisions[n].rid: n
533 535 for n in self._revisioncache}
534 536
535 537 if not stoprids:
536 538 stoprids[-1] = None
537 539
538 540 fulltext = resolvedeltachain(self._db, self._pathid, node,
539 541 self._revisioncache, stoprids,
540 542 zstddctx=self._dctx)
541 543
542 544 # Don't verify hashes if parent nodes were rewritten, as the hash
543 545 # wouldn't verify.
544 546 if self._revisions[node].flags & (FLAG_MISSING_P1 | FLAG_MISSING_P2):
545 547 _verifyhash = False
546 548
547 549 if _verifyhash:
548 550 self._checkhash(fulltext, node)
549 551 self._revisioncache[node] = fulltext
550 552
551 553 return fulltext
552 554
553 555 def rawdata(self, *args, **kwargs):
554 556 return self.revision(*args, **kwargs)
555 557
556 558 def read(self, node):
557 559 return storageutil.filtermetadata(self.revision(node))
558 560
559 561 def renamed(self, node):
560 562 return storageutil.filerevisioncopied(self, node)
561 563
562 564 def cmp(self, node, fulltext):
563 565 return not storageutil.filedataequivalent(self, node, fulltext)
564 566
565 567 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
566 568 assumehaveparentrevisions=False,
567 569 deltamode=repository.CG_DELTAMODE_STD):
568 570 if nodesorder not in ('nodes', 'storage', 'linear', None):
569 571 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
570 572 nodesorder)
571 573
572 574 nodes = [n for n in nodes if n != nullid]
573 575
574 576 if not nodes:
575 577 return
576 578
577 579 # TODO perform in a single query.
578 580 res = self._db.execute(
579 581 r'SELECT revnum, deltaid FROM fileindex '
580 582 r'WHERE pathid=? '
581 583 r' AND node in (%s)' % (r','.join([r'?'] * len(nodes))),
582 584 tuple([self._pathid] + nodes))
583 585
584 586 deltabases = {}
585 587
586 588 for rev, deltaid in res:
587 589 res = self._db.execute(
588 590 r'SELECT revnum from fileindex WHERE pathid=? AND deltaid=?',
589 591 (self._pathid, deltaid))
590 592 deltabases[rev] = res.fetchone()[0]
591 593
592 594 # TODO define revdifffn so we can use delta from storage.
593 595 for delta in storageutil.emitrevisions(
594 596 self, nodes, nodesorder, sqliterevisiondelta,
595 597 deltaparentfn=deltabases.__getitem__,
596 598 revisiondata=revisiondata,
597 599 assumehaveparentrevisions=assumehaveparentrevisions,
598 600 deltamode=deltamode):
599 601
600 602 yield delta
601 603
602 604 # End of ifiledata interface.
603 605
604 606 # Start of ifilemutation interface.
605 607
606 608 def add(self, filedata, meta, transaction, linkrev, p1, p2):
607 609 if meta or filedata.startswith(b'\x01\n'):
608 610 filedata = storageutil.packmeta(meta, filedata)
609 611
610 612 return self.addrevision(filedata, transaction, linkrev, p1, p2)
611 613
612 614 def addrevision(self, revisiondata, transaction, linkrev, p1, p2, node=None,
613 615 flags=0, cachedelta=None):
614 616 if flags:
615 617 raise SQLiteStoreError(_('flags not supported on revisions'))
616 618
617 619 validatehash = node is not None
618 620 node = node or storageutil.hashrevisionsha1(revisiondata, p1, p2)
619 621
620 622 if validatehash:
621 623 self._checkhash(revisiondata, node, p1, p2)
622 624
623 625 if node in self._nodetorev:
624 626 return node
625 627
626 628 node = self._addrawrevision(node, revisiondata, transaction, linkrev,
627 629 p1, p2)
628 630
629 631 self._revisioncache[node] = revisiondata
630 632 return node
631 633
632 634 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None,
633 635 maybemissingparents=False):
634 636 nodes = []
635 637
636 638 for node, p1, p2, linknode, deltabase, delta, wireflags in deltas:
637 639 storeflags = 0
638 640
639 641 if wireflags & repository.REVISION_FLAG_CENSORED:
640 642 storeflags |= FLAG_CENSORED
641 643
642 644 if wireflags & ~repository.REVISION_FLAG_CENSORED:
643 645 raise SQLiteStoreError('unhandled revision flag')
644 646
645 647 if maybemissingparents:
646 648 if p1 != nullid and not self.hasnode(p1):
647 649 p1 = nullid
648 650 storeflags |= FLAG_MISSING_P1
649 651
650 652 if p2 != nullid and not self.hasnode(p2):
651 653 p2 = nullid
652 654 storeflags |= FLAG_MISSING_P2
653 655
654 656 baserev = self.rev(deltabase)
655 657
656 658 # If base is censored, delta must be full replacement in a single
657 659 # patch operation.
658 660 if baserev != nullrev and self.iscensored(baserev):
659 661 hlen = struct.calcsize('>lll')
660 662 oldlen = len(self.rawdata(deltabase, _verifyhash=False))
661 663 newlen = len(delta) - hlen
662 664
663 665 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
664 666 raise error.CensoredBaseError(self._path,
665 667 deltabase)
666 668
667 669 if (not (storeflags & FLAG_CENSORED)
668 670 and storageutil.deltaiscensored(
669 671 delta, baserev, lambda x: len(self.rawdata(x)))):
670 672 storeflags |= FLAG_CENSORED
671 673
672 674 linkrev = linkmapper(linknode)
673 675
674 676 nodes.append(node)
675 677
676 678 if node in self._revisions:
677 679 # Possibly reset parents to make them proper.
678 680 entry = self._revisions[node]
679 681
680 682 if entry.flags & FLAG_MISSING_P1 and p1 != nullid:
681 683 entry.p1node = p1
682 684 entry.p1rev = self._nodetorev[p1]
683 685 entry.flags &= ~FLAG_MISSING_P1
684 686
685 687 self._db.execute(
686 688 r'UPDATE fileindex SET p1rev=?, flags=? '
687 689 r'WHERE id=?',
688 690 (self._nodetorev[p1], entry.flags, entry.rid))
689 691
690 692 if entry.flags & FLAG_MISSING_P2 and p2 != nullid:
691 693 entry.p2node = p2
692 694 entry.p2rev = self._nodetorev[p2]
693 695 entry.flags &= ~FLAG_MISSING_P2
694 696
695 697 self._db.execute(
696 698 r'UPDATE fileindex SET p2rev=?, flags=? '
697 699 r'WHERE id=?',
698 700 (self._nodetorev[p1], entry.flags, entry.rid))
699 701
700 702 continue
701 703
702 704 if deltabase == nullid:
703 705 text = mdiff.patch(b'', delta)
704 706 storedelta = None
705 707 else:
706 708 text = None
707 709 storedelta = (deltabase, delta)
708 710
709 711 self._addrawrevision(node, text, transaction, linkrev, p1, p2,
710 712 storedelta=storedelta, flags=storeflags)
711 713
712 714 if addrevisioncb:
713 715 addrevisioncb(self, node)
714 716
715 717 return nodes
716 718
717 719 def censorrevision(self, tr, censornode, tombstone=b''):
718 720 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
719 721
720 722 # This restriction is cargo culted from revlogs and makes no sense for
721 723 # SQLite, since columns can be resized at will.
722 724 if len(tombstone) > len(self.rawdata(censornode)):
723 725 raise error.Abort(_('censor tombstone must be no longer than '
724 726 'censored data'))
725 727
726 728 # We need to replace the censored revision's data with the tombstone.
727 729 # But replacing that data will have implications for delta chains that
728 730 # reference it.
729 731 #
730 732 # While "better," more complex strategies are possible, we do something
731 733 # simple: we find delta chain children of the censored revision and we
732 734 # replace those incremental deltas with fulltexts of their corresponding
733 735 # revision. Then we delete the now-unreferenced delta and original
734 736 # revision and insert a replacement.
735 737
736 738 # Find the delta to be censored.
737 739 censoreddeltaid = self._db.execute(
738 740 r'SELECT deltaid FROM fileindex WHERE id=?',
739 741 (self._revisions[censornode].rid,)).fetchone()[0]
740 742
741 743 # Find all its delta chain children.
742 744 # TODO once we support storing deltas for !files, we'll need to look
743 745 # for those delta chains too.
744 746 rows = list(self._db.execute(
745 747 r'SELECT id, pathid, node FROM fileindex '
746 748 r'WHERE deltabaseid=? OR deltaid=?',
747 749 (censoreddeltaid, censoreddeltaid)))
748 750
749 751 for row in rows:
750 752 rid, pathid, node = row
751 753
752 754 fulltext = resolvedeltachain(self._db, pathid, node, {}, {-1: None},
753 755 zstddctx=self._dctx)
754 756
755 757 deltahash = hashlib.sha1(fulltext).digest()
756 758
757 759 if self._compengine == 'zstd':
758 760 deltablob = self._cctx.compress(fulltext)
759 761 compression = COMPRESSION_ZSTD
760 762 elif self._compengine == 'zlib':
761 763 deltablob = zlib.compress(fulltext)
762 764 compression = COMPRESSION_ZLIB
763 765 elif self._compengine == 'none':
764 766 deltablob = fulltext
765 767 compression = COMPRESSION_NONE
766 768 else:
767 769 raise error.ProgrammingError('unhandled compression engine: %s'
768 770 % self._compengine)
769 771
770 772 if len(deltablob) >= len(fulltext):
771 773 deltablob = fulltext
772 774 compression = COMPRESSION_NONE
773 775
774 776 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
775 777
776 778 self._db.execute(
777 779 r'UPDATE fileindex SET deltaid=?, deltabaseid=NULL '
778 780 r'WHERE id=?', (deltaid, rid))
779 781
780 782 # Now create the tombstone delta and replace the delta on the censored
781 783 # node.
782 784 deltahash = hashlib.sha1(tombstone).digest()
783 785 tombstonedeltaid = insertdelta(self._db, COMPRESSION_NONE,
784 786 deltahash, tombstone)
785 787
786 788 flags = self._revisions[censornode].flags
787 789 flags |= FLAG_CENSORED
788 790
789 791 self._db.execute(
790 792 r'UPDATE fileindex SET flags=?, deltaid=?, deltabaseid=NULL '
791 793 r'WHERE pathid=? AND node=?',
792 794 (flags, tombstonedeltaid, self._pathid, censornode))
793 795
794 796 self._db.execute(
795 797 r'DELETE FROM delta WHERE id=?', (censoreddeltaid,))
796 798
797 799 self._refreshindex()
798 800 self._revisioncache.clear()
799 801
800 802 def getstrippoint(self, minlink):
801 803 return storageutil.resolvestripinfo(minlink, len(self) - 1,
802 804 [self.rev(n) for n in self.heads()],
803 805 self.linkrev,
804 806 self.parentrevs)
805 807
806 808 def strip(self, minlink, transaction):
807 809 if not len(self):
808 810 return
809 811
810 812 rev, _ignored = self.getstrippoint(minlink)
811 813
812 814 if rev == len(self):
813 815 return
814 816
815 817 for rev in self.revs(rev):
816 818 self._db.execute(
817 819 r'DELETE FROM fileindex WHERE pathid=? AND node=?',
818 820 (self._pathid, self.node(rev)))
819 821
820 822 # TODO how should we garbage collect data in delta table?
821 823
822 824 self._refreshindex()
823 825
824 826 # End of ifilemutation interface.
825 827
826 828 # Start of ifilestorage interface.
827 829
828 830 def files(self):
829 831 return []
830 832
831 833 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
832 834 revisionscount=False, trackedsize=False,
833 835 storedsize=False):
834 836 d = {}
835 837
836 838 if exclusivefiles:
837 839 d['exclusivefiles'] = []
838 840
839 841 if sharedfiles:
840 842 # TODO list sqlite file(s) here.
841 843 d['sharedfiles'] = []
842 844
843 845 if revisionscount:
844 846 d['revisionscount'] = len(self)
845 847
846 848 if trackedsize:
847 849 d['trackedsize'] = sum(len(self.revision(node))
848 850 for node in self._nodetorev)
849 851
850 852 if storedsize:
851 853 # TODO implement this?
852 854 d['storedsize'] = None
853 855
854 856 return d
855 857
856 858 def verifyintegrity(self, state):
857 859 state['skipread'] = set()
858 860
859 861 for rev in self:
860 862 node = self.node(rev)
861 863
862 864 try:
863 865 self.revision(node)
864 866 except Exception as e:
865 867 yield sqliteproblem(
866 868 error=_('unpacking %s: %s') % (short(node), e),
867 869 node=node)
868 870
869 871 state['skipread'].add(node)
870 872
871 873 # End of ifilestorage interface.
872 874
873 875 def _checkhash(self, fulltext, node, p1=None, p2=None):
874 876 if p1 is None and p2 is None:
875 877 p1, p2 = self.parents(node)
876 878
877 879 if node == storageutil.hashrevisionsha1(fulltext, p1, p2):
878 880 return
879 881
880 882 try:
881 883 del self._revisioncache[node]
882 884 except KeyError:
883 885 pass
884 886
885 887 if storageutil.iscensoredtext(fulltext):
886 888 raise error.CensoredNodeError(self._path, node, fulltext)
887 889
888 890 raise SQLiteStoreError(_('integrity check failed on %s') %
889 891 self._path)
890 892
891 893 def _addrawrevision(self, node, revisiondata, transaction, linkrev,
892 894 p1, p2, storedelta=None, flags=0):
893 895 if self._pathid is None:
894 896 res = self._db.execute(
895 897 r'INSERT INTO filepath (path) VALUES (?)', (self._path,))
896 898 self._pathid = res.lastrowid
897 899
898 900 # For simplicity, always store a delta against p1.
899 901 # TODO we need a lot more logic here to make behavior reasonable.
900 902
901 903 if storedelta:
902 904 deltabase, delta = storedelta
903 905
904 906 if isinstance(deltabase, int):
905 907 deltabase = self.node(deltabase)
906 908
907 909 else:
908 910 assert revisiondata is not None
909 911 deltabase = p1
910 912
911 913 if deltabase == nullid:
912 914 delta = revisiondata
913 915 else:
914 916 delta = mdiff.textdiff(self.revision(self.rev(deltabase)),
915 917 revisiondata)
916 918
917 919 # File index stores a pointer to its delta and the parent delta.
918 920 # The parent delta is stored via a pointer to the fileindex PK.
919 921 if deltabase == nullid:
920 922 baseid = None
921 923 else:
922 924 baseid = self._revisions[deltabase].rid
923 925
924 926 # Deltas are stored with a hash of their content. This allows
925 927 # us to de-duplicate. The table is configured to ignore conflicts
926 928 # and it is faster to just insert and silently noop than to look
927 929 # first.
928 930 deltahash = hashlib.sha1(delta).digest()
929 931
930 932 if self._compengine == 'zstd':
931 933 deltablob = self._cctx.compress(delta)
932 934 compression = COMPRESSION_ZSTD
933 935 elif self._compengine == 'zlib':
934 936 deltablob = zlib.compress(delta)
935 937 compression = COMPRESSION_ZLIB
936 938 elif self._compengine == 'none':
937 939 deltablob = delta
938 940 compression = COMPRESSION_NONE
939 941 else:
940 942 raise error.ProgrammingError('unhandled compression engine: %s' %
941 943 self._compengine)
942 944
943 945 # Don't store compressed data if it isn't practical.
944 946 if len(deltablob) >= len(delta):
945 947 deltablob = delta
946 948 compression = COMPRESSION_NONE
947 949
948 950 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
949 951
950 952 rev = len(self)
951 953
952 954 if p1 == nullid:
953 955 p1rev = nullrev
954 956 else:
955 957 p1rev = self._nodetorev[p1]
956 958
957 959 if p2 == nullid:
958 960 p2rev = nullrev
959 961 else:
960 962 p2rev = self._nodetorev[p2]
961 963
962 964 rid = self._db.execute(
963 965 r'INSERT INTO fileindex ('
964 966 r' pathid, revnum, node, p1rev, p2rev, linkrev, flags, '
965 967 r' deltaid, deltabaseid) '
966 968 r' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)',
967 969 (self._pathid, rev, node, p1rev, p2rev, linkrev, flags,
968 970 deltaid, baseid)
969 971 ).lastrowid
970 972
971 973 entry = revisionentry(
972 974 rid=rid,
973 975 rev=rev,
974 976 node=node,
975 977 p1rev=p1rev,
976 978 p2rev=p2rev,
977 979 p1node=p1,
978 980 p2node=p2,
979 981 linkrev=linkrev,
980 982 flags=flags)
981 983
982 984 self._nodetorev[node] = rev
983 985 self._revtonode[rev] = node
984 986 self._revisions[node] = entry
985 987
986 988 return node
987 989
988 990 class sqliterepository(localrepo.localrepository):
989 991 def cancopy(self):
990 992 return False
991 993
992 994 def transaction(self, *args, **kwargs):
993 995 current = self.currenttransaction()
994 996
995 997 tr = super(sqliterepository, self).transaction(*args, **kwargs)
996 998
997 999 if current:
998 1000 return tr
999 1001
1000 1002 self._dbconn.execute(r'BEGIN TRANSACTION')
1001 1003
1002 1004 def committransaction(_):
1003 1005 self._dbconn.commit()
1004 1006
1005 1007 tr.addfinalize('sqlitestore', committransaction)
1006 1008
1007 1009 return tr
1008 1010
1009 1011 @property
1010 1012 def _dbconn(self):
1011 1013 # SQLite connections can only be used on the thread that created
1012 1014 # them. In most cases, this "just works." However, hgweb uses
1013 1015 # multiple threads.
1014 1016 tid = threading.current_thread().ident
1015 1017
1016 1018 if self._db:
1017 1019 if self._db[0] == tid:
1018 1020 return self._db[1]
1019 1021
1020 1022 db = makedb(self.svfs.join('db.sqlite'))
1021 1023 self._db = (tid, db)
1022 1024
1023 1025 return db
1024 1026
1025 1027 def makedb(path):
1026 1028 """Construct a database handle for a database at path."""
1027 1029
1028 1030 db = sqlite3.connect(encoding.strfromlocal(path))
1029 1031 db.text_factory = bytes
1030 1032
1031 1033 res = db.execute(r'PRAGMA user_version').fetchone()[0]
1032 1034
1033 1035 # New database.
1034 1036 if res == 0:
1035 1037 for statement in CREATE_SCHEMA:
1036 1038 db.execute(statement)
1037 1039
1038 1040 db.commit()
1039 1041
1040 1042 elif res == CURRENT_SCHEMA_VERSION:
1041 1043 pass
1042 1044
1043 1045 else:
1044 1046 raise error.Abort(_('sqlite database has unrecognized version'))
1045 1047
1046 1048 db.execute(r'PRAGMA journal_mode=WAL')
1047 1049
1048 1050 return db
1049 1051
1050 1052 def featuresetup(ui, supported):
1051 1053 supported.add(REQUIREMENT)
1052 1054
1053 1055 if zstd:
1054 1056 supported.add(REQUIREMENT_ZSTD)
1055 1057
1056 1058 supported.add(REQUIREMENT_ZLIB)
1057 1059 supported.add(REQUIREMENT_NONE)
1058 1060 supported.add(REQUIREMENT_SHALLOW_FILES)
1059 1061 supported.add(repository.NARROW_REQUIREMENT)
1060 1062
1061 1063 def newreporequirements(orig, ui, createopts):
1062 1064 if createopts['backend'] != 'sqlite':
1063 1065 return orig(ui, createopts)
1064 1066
1065 1067 # This restriction can be lifted once we have more confidence.
1066 1068 if 'sharedrepo' in createopts:
1067 1069 raise error.Abort(_('shared repositories not supported with SQLite '
1068 1070 'store'))
1069 1071
1070 1072 # This filtering is out of an abundance of caution: we want to ensure
1071 1073 # we honor creation options and we do that by annotating exactly the
1072 1074 # creation options we recognize.
1073 1075 known = {
1074 1076 'narrowfiles',
1075 1077 'backend',
1076 1078 'shallowfilestore',
1077 1079 }
1078 1080
1079 1081 unsupported = set(createopts) - known
1080 1082 if unsupported:
1081 1083 raise error.Abort(_('SQLite store does not support repo creation '
1082 1084 'option: %s') % ', '.join(sorted(unsupported)))
1083 1085
1084 1086 # Since we're a hybrid store that still relies on revlogs, we fall back
1085 1087 # to using the revlogv1 backend's storage requirements then adding our
1086 1088 # own requirement.
1087 1089 createopts['backend'] = 'revlogv1'
1088 1090 requirements = orig(ui, createopts)
1089 1091 requirements.add(REQUIREMENT)
1090 1092
1091 1093 compression = ui.config('storage', 'sqlite.compression')
1092 1094
1093 1095 if compression == 'zstd' and not zstd:
1094 1096 raise error.Abort(_('storage.sqlite.compression set to "zstd" but '
1095 1097 'zstandard compression not available to this '
1096 1098 'Mercurial install'))
1097 1099
1098 1100 if compression == 'zstd':
1099 1101 requirements.add(REQUIREMENT_ZSTD)
1100 1102 elif compression == 'zlib':
1101 1103 requirements.add(REQUIREMENT_ZLIB)
1102 1104 elif compression == 'none':
1103 1105 requirements.add(REQUIREMENT_NONE)
1104 1106 else:
1105 1107 raise error.Abort(_('unknown compression engine defined in '
1106 1108 'storage.sqlite.compression: %s') % compression)
1107 1109
1108 1110 if createopts.get('shallowfilestore'):
1109 1111 requirements.add(REQUIREMENT_SHALLOW_FILES)
1110 1112
1111 1113 return requirements
1112 1114
1113 1115 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1114 1116 class sqlitefilestorage(object):
1115 1117 """Repository file storage backed by SQLite."""
1116 1118 def file(self, path):
1117 1119 if path[0] == b'/':
1118 1120 path = path[1:]
1119 1121
1120 1122 if REQUIREMENT_ZSTD in self.requirements:
1121 1123 compression = 'zstd'
1122 1124 elif REQUIREMENT_ZLIB in self.requirements:
1123 1125 compression = 'zlib'
1124 1126 elif REQUIREMENT_NONE in self.requirements:
1125 1127 compression = 'none'
1126 1128 else:
1127 1129 raise error.Abort(_('unable to determine what compression engine '
1128 1130 'to use for SQLite storage'))
1129 1131
1130 1132 return sqlitefilestore(self._dbconn, path, compression)
1131 1133
1132 1134 def makefilestorage(orig, requirements, features, **kwargs):
1133 1135 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
1134 1136 if REQUIREMENT in requirements:
1135 1137 if REQUIREMENT_SHALLOW_FILES in requirements:
1136 1138 features.add(repository.REPO_FEATURE_SHALLOW_FILE_STORAGE)
1137 1139
1138 1140 return sqlitefilestorage
1139 1141 else:
1140 1142 return orig(requirements=requirements, features=features, **kwargs)
1141 1143
1142 1144 def makemain(orig, ui, requirements, **kwargs):
1143 1145 if REQUIREMENT in requirements:
1144 1146 if REQUIREMENT_ZSTD in requirements and not zstd:
1145 1147 raise error.Abort(_('repository uses zstandard compression, which '
1146 1148 'is not available to this Mercurial install'))
1147 1149
1148 1150 return sqliterepository
1149 1151
1150 1152 return orig(requirements=requirements, **kwargs)
1151 1153
1152 1154 def verifierinit(orig, self, *args, **kwargs):
1153 1155 orig(self, *args, **kwargs)
1154 1156
1155 1157 # We don't care that files in the store don't align with what is
1156 1158 # advertised. So suppress these warnings.
1157 1159 self.warnorphanstorefiles = False
1158 1160
1159 1161 def extsetup(ui):
1160 1162 localrepo.featuresetupfuncs.add(featuresetup)
1161 1163 extensions.wrapfunction(localrepo, 'newreporequirements',
1162 1164 newreporequirements)
1163 1165 extensions.wrapfunction(localrepo, 'makefilestorage',
1164 1166 makefilestorage)
1165 1167 extensions.wrapfunction(localrepo, 'makemain',
1166 1168 makemain)
1167 1169 extensions.wrapfunction(verify.verifier, '__init__',
1168 1170 verifierinit)
1169 1171
1170 1172 def reposetup(ui, repo):
1171 1173 if isinstance(repo, sqliterepository):
1172 1174 repo._db = None
1173 1175
1174 1176 # TODO check for bundlerepository?
@@ -1,1423 +1,1426
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from . import (
23 23 error,
24 24 match as matchmod,
25 25 mdiff,
26 26 phases,
27 27 pycompat,
28 util,
29 )
30
31 from .interfaces import (
28 32 repository,
29 util,
30 33 )
31 34
32 35 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
33 36 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
34 37 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
35 38
36 39 LFS_REQUIREMENT = 'lfs'
37 40
38 41 readexactly = util.readexactly
39 42
40 43 def getchunk(stream):
41 44 """return the next chunk from stream as a string"""
42 45 d = readexactly(stream, 4)
43 46 l = struct.unpack(">l", d)[0]
44 47 if l <= 4:
45 48 if l:
46 49 raise error.Abort(_("invalid chunk length %d") % l)
47 50 return ""
48 51 return readexactly(stream, l - 4)
49 52
50 53 def chunkheader(length):
51 54 """return a changegroup chunk header (string)"""
52 55 return struct.pack(">l", length + 4)
53 56
54 57 def closechunk():
55 58 """return a changegroup chunk header (string) for a zero-length chunk"""
56 59 return struct.pack(">l", 0)
57 60
58 61 def _fileheader(path):
59 62 """Obtain a changegroup chunk header for a named path."""
60 63 return chunkheader(len(path)) + path
61 64
62 65 def writechunks(ui, chunks, filename, vfs=None):
63 66 """Write chunks to a file and return its filename.
64 67
65 68 The stream is assumed to be a bundle file.
66 69 Existing files will not be overwritten.
67 70 If no filename is specified, a temporary file is created.
68 71 """
69 72 fh = None
70 73 cleanup = None
71 74 try:
72 75 if filename:
73 76 if vfs:
74 77 fh = vfs.open(filename, "wb")
75 78 else:
76 79 # Increase default buffer size because default is usually
77 80 # small (4k is common on Linux).
78 81 fh = open(filename, "wb", 131072)
79 82 else:
80 83 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
81 84 fh = os.fdopen(fd, r"wb")
82 85 cleanup = filename
83 86 for c in chunks:
84 87 fh.write(c)
85 88 cleanup = None
86 89 return filename
87 90 finally:
88 91 if fh is not None:
89 92 fh.close()
90 93 if cleanup is not None:
91 94 if filename and vfs:
92 95 vfs.unlink(cleanup)
93 96 else:
94 97 os.unlink(cleanup)
95 98
96 99 class cg1unpacker(object):
97 100 """Unpacker for cg1 changegroup streams.
98 101
99 102 A changegroup unpacker handles the framing of the revision data in
100 103 the wire format. Most consumers will want to use the apply()
101 104 method to add the changes from the changegroup to a repository.
102 105
103 106 If you're forwarding a changegroup unmodified to another consumer,
104 107 use getchunks(), which returns an iterator of changegroup
105 108 chunks. This is mostly useful for cases where you need to know the
106 109 data stream has ended by observing the end of the changegroup.
107 110
108 111 deltachunk() is useful only if you're applying delta data. Most
109 112 consumers should prefer apply() instead.
110 113
111 114 A few other public methods exist. Those are used only for
112 115 bundlerepo and some debug commands - their use is discouraged.
113 116 """
114 117 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
115 118 deltaheadersize = deltaheader.size
116 119 version = '01'
117 120 _grouplistcount = 1 # One list of files after the manifests
118 121
119 122 def __init__(self, fh, alg, extras=None):
120 123 if alg is None:
121 124 alg = 'UN'
122 125 if alg not in util.compengines.supportedbundletypes:
123 126 raise error.Abort(_('unknown stream compression type: %s')
124 127 % alg)
125 128 if alg == 'BZ':
126 129 alg = '_truncatedBZ'
127 130
128 131 compengine = util.compengines.forbundletype(alg)
129 132 self._stream = compengine.decompressorreader(fh)
130 133 self._type = alg
131 134 self.extras = extras or {}
132 135 self.callback = None
133 136
134 137 # These methods (compressed, read, seek, tell) all appear to only
135 138 # be used by bundlerepo, but it's a little hard to tell.
136 139 def compressed(self):
137 140 return self._type is not None and self._type != 'UN'
138 141 def read(self, l):
139 142 return self._stream.read(l)
140 143 def seek(self, pos):
141 144 return self._stream.seek(pos)
142 145 def tell(self):
143 146 return self._stream.tell()
144 147 def close(self):
145 148 return self._stream.close()
146 149
147 150 def _chunklength(self):
148 151 d = readexactly(self._stream, 4)
149 152 l = struct.unpack(">l", d)[0]
150 153 if l <= 4:
151 154 if l:
152 155 raise error.Abort(_("invalid chunk length %d") % l)
153 156 return 0
154 157 if self.callback:
155 158 self.callback()
156 159 return l - 4
157 160
158 161 def changelogheader(self):
159 162 """v10 does not have a changelog header chunk"""
160 163 return {}
161 164
162 165 def manifestheader(self):
163 166 """v10 does not have a manifest header chunk"""
164 167 return {}
165 168
166 169 def filelogheader(self):
167 170 """return the header of the filelogs chunk, v10 only has the filename"""
168 171 l = self._chunklength()
169 172 if not l:
170 173 return {}
171 174 fname = readexactly(self._stream, l)
172 175 return {'filename': fname}
173 176
174 177 def _deltaheader(self, headertuple, prevnode):
175 178 node, p1, p2, cs = headertuple
176 179 if prevnode is None:
177 180 deltabase = p1
178 181 else:
179 182 deltabase = prevnode
180 183 flags = 0
181 184 return node, p1, p2, deltabase, cs, flags
182 185
183 186 def deltachunk(self, prevnode):
184 187 l = self._chunklength()
185 188 if not l:
186 189 return {}
187 190 headerdata = readexactly(self._stream, self.deltaheadersize)
188 191 header = self.deltaheader.unpack(headerdata)
189 192 delta = readexactly(self._stream, l - self.deltaheadersize)
190 193 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
191 194 return (node, p1, p2, cs, deltabase, delta, flags)
192 195
193 196 def getchunks(self):
194 197 """returns all the chunks contains in the bundle
195 198
196 199 Used when you need to forward the binary stream to a file or another
197 200 network API. To do so, it parse the changegroup data, otherwise it will
198 201 block in case of sshrepo because it don't know the end of the stream.
199 202 """
200 203 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
201 204 # and a list of filelogs. For changegroup 3, we expect 4 parts:
202 205 # changelog, manifestlog, a list of tree manifestlogs, and a list of
203 206 # filelogs.
204 207 #
205 208 # Changelog and manifestlog parts are terminated with empty chunks. The
206 209 # tree and file parts are a list of entry sections. Each entry section
207 210 # is a series of chunks terminating in an empty chunk. The list of these
208 211 # entry sections is terminated in yet another empty chunk, so we know
209 212 # we've reached the end of the tree/file list when we reach an empty
210 213 # chunk that was proceeded by no non-empty chunks.
211 214
212 215 parts = 0
213 216 while parts < 2 + self._grouplistcount:
214 217 noentries = True
215 218 while True:
216 219 chunk = getchunk(self)
217 220 if not chunk:
218 221 # The first two empty chunks represent the end of the
219 222 # changelog and the manifestlog portions. The remaining
220 223 # empty chunks represent either A) the end of individual
221 224 # tree or file entries in the file list, or B) the end of
222 225 # the entire list. It's the end of the entire list if there
223 226 # were no entries (i.e. noentries is True).
224 227 if parts < 2:
225 228 parts += 1
226 229 elif noentries:
227 230 parts += 1
228 231 break
229 232 noentries = False
230 233 yield chunkheader(len(chunk))
231 234 pos = 0
232 235 while pos < len(chunk):
233 236 next = pos + 2**20
234 237 yield chunk[pos:next]
235 238 pos = next
236 239 yield closechunk()
237 240
238 241 def _unpackmanifests(self, repo, revmap, trp, prog):
239 242 self.callback = prog.increment
240 243 # no need to check for empty manifest group here:
241 244 # if the result of the merge of 1 and 2 is the same in 3 and 4,
242 245 # no new manifest will be created and the manifest group will
243 246 # be empty during the pull
244 247 self.manifestheader()
245 248 deltas = self.deltaiter()
246 249 repo.manifestlog.getstorage(b'').addgroup(deltas, revmap, trp)
247 250 prog.complete()
248 251 self.callback = None
249 252
250 253 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
251 254 expectedtotal=None):
252 255 """Add the changegroup returned by source.read() to this repo.
253 256 srctype is a string like 'push', 'pull', or 'unbundle'. url is
254 257 the URL of the repo where this changegroup is coming from.
255 258
256 259 Return an integer summarizing the change to this repo:
257 260 - nothing changed or no source: 0
258 261 - more heads than before: 1+added heads (2..n)
259 262 - fewer heads than before: -1-removed heads (-2..-n)
260 263 - number of heads stays the same: 1
261 264 """
262 265 repo = repo.unfiltered()
263 266 def csmap(x):
264 267 repo.ui.debug("add changeset %s\n" % short(x))
265 268 return len(cl)
266 269
267 270 def revmap(x):
268 271 return cl.rev(x)
269 272
270 273 changesets = files = revisions = 0
271 274
272 275 try:
273 276 # The transaction may already carry source information. In this
274 277 # case we use the top level data. We overwrite the argument
275 278 # because we need to use the top level value (if they exist)
276 279 # in this function.
277 280 srctype = tr.hookargs.setdefault('source', srctype)
278 281 tr.hookargs.setdefault('url', url)
279 282 repo.hook('prechangegroup',
280 283 throw=True, **pycompat.strkwargs(tr.hookargs))
281 284
282 285 # write changelog data to temp files so concurrent readers
283 286 # will not see an inconsistent view
284 287 cl = repo.changelog
285 288 cl.delayupdate(tr)
286 289 oldheads = set(cl.heads())
287 290
288 291 trp = weakref.proxy(tr)
289 292 # pull off the changeset group
290 293 repo.ui.status(_("adding changesets\n"))
291 294 clstart = len(cl)
292 295 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
293 296 total=expectedtotal)
294 297 self.callback = progress.increment
295 298
296 299 efiles = set()
297 300 def onchangelog(cl, node):
298 301 efiles.update(cl.readfiles(node))
299 302
300 303 self.changelogheader()
301 304 deltas = self.deltaiter()
302 305 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
303 306 efiles = len(efiles)
304 307
305 308 if not cgnodes:
306 309 repo.ui.develwarn('applied empty changelog from changegroup',
307 310 config='warn-empty-changegroup')
308 311 clend = len(cl)
309 312 changesets = clend - clstart
310 313 progress.complete()
311 314 self.callback = None
312 315
313 316 # pull off the manifest group
314 317 repo.ui.status(_("adding manifests\n"))
315 318 # We know that we'll never have more manifests than we had
316 319 # changesets.
317 320 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
318 321 total=changesets)
319 322 self._unpackmanifests(repo, revmap, trp, progress)
320 323
321 324 needfiles = {}
322 325 if repo.ui.configbool('server', 'validate'):
323 326 cl = repo.changelog
324 327 ml = repo.manifestlog
325 328 # validate incoming csets have their manifests
326 329 for cset in pycompat.xrange(clstart, clend):
327 330 mfnode = cl.changelogrevision(cset).manifest
328 331 mfest = ml[mfnode].readdelta()
329 332 # store file cgnodes we must see
330 333 for f, n in mfest.iteritems():
331 334 needfiles.setdefault(f, set()).add(n)
332 335
333 336 # process the files
334 337 repo.ui.status(_("adding file changes\n"))
335 338 newrevs, newfiles = _addchangegroupfiles(
336 339 repo, self, revmap, trp, efiles, needfiles)
337 340 revisions += newrevs
338 341 files += newfiles
339 342
340 343 deltaheads = 0
341 344 if oldheads:
342 345 heads = cl.heads()
343 346 deltaheads = len(heads) - len(oldheads)
344 347 for h in heads:
345 348 if h not in oldheads and repo[h].closesbranch():
346 349 deltaheads -= 1
347 350 htext = ""
348 351 if deltaheads:
349 352 htext = _(" (%+d heads)") % deltaheads
350 353
351 354 repo.ui.status(_("added %d changesets"
352 355 " with %d changes to %d files%s\n")
353 356 % (changesets, revisions, files, htext))
354 357 repo.invalidatevolatilesets()
355 358
356 359 if changesets > 0:
357 360 if 'node' not in tr.hookargs:
358 361 tr.hookargs['node'] = hex(cl.node(clstart))
359 362 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
360 363 hookargs = dict(tr.hookargs)
361 364 else:
362 365 hookargs = dict(tr.hookargs)
363 366 hookargs['node'] = hex(cl.node(clstart))
364 367 hookargs['node_last'] = hex(cl.node(clend - 1))
365 368 repo.hook('pretxnchangegroup',
366 369 throw=True, **pycompat.strkwargs(hookargs))
367 370
368 371 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
369 372 phaseall = None
370 373 if srctype in ('push', 'serve'):
371 374 # Old servers can not push the boundary themselves.
372 375 # New servers won't push the boundary if changeset already
373 376 # exists locally as secret
374 377 #
375 378 # We should not use added here but the list of all change in
376 379 # the bundle
377 380 if repo.publishing():
378 381 targetphase = phaseall = phases.public
379 382 else:
380 383 # closer target phase computation
381 384
382 385 # Those changesets have been pushed from the
383 386 # outside, their phases are going to be pushed
384 387 # alongside. Therefor `targetphase` is
385 388 # ignored.
386 389 targetphase = phaseall = phases.draft
387 390 if added:
388 391 phases.registernew(repo, tr, targetphase, added)
389 392 if phaseall is not None:
390 393 phases.advanceboundary(repo, tr, phaseall, cgnodes)
391 394
392 395 if changesets > 0:
393 396
394 397 def runhooks():
395 398 # These hooks run when the lock releases, not when the
396 399 # transaction closes. So it's possible for the changelog
397 400 # to have changed since we last saw it.
398 401 if clstart >= len(repo):
399 402 return
400 403
401 404 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
402 405
403 406 for n in added:
404 407 args = hookargs.copy()
405 408 args['node'] = hex(n)
406 409 del args['node_last']
407 410 repo.hook("incoming", **pycompat.strkwargs(args))
408 411
409 412 newheads = [h for h in repo.heads()
410 413 if h not in oldheads]
411 414 repo.ui.log("incoming",
412 415 "%d incoming changes - new heads: %s\n",
413 416 len(added),
414 417 ', '.join([hex(c[:6]) for c in newheads]))
415 418
416 419 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
417 420 lambda tr: repo._afterlock(runhooks))
418 421 finally:
419 422 repo.ui.flush()
420 423 # never return 0 here:
421 424 if deltaheads < 0:
422 425 ret = deltaheads - 1
423 426 else:
424 427 ret = deltaheads + 1
425 428 return ret
426 429
427 430 def deltaiter(self):
428 431 """
429 432 returns an iterator of the deltas in this changegroup
430 433
431 434 Useful for passing to the underlying storage system to be stored.
432 435 """
433 436 chain = None
434 437 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
435 438 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
436 439 yield chunkdata
437 440 chain = chunkdata[0]
438 441
439 442 class cg2unpacker(cg1unpacker):
440 443 """Unpacker for cg2 streams.
441 444
442 445 cg2 streams add support for generaldelta, so the delta header
443 446 format is slightly different. All other features about the data
444 447 remain the same.
445 448 """
446 449 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
447 450 deltaheadersize = deltaheader.size
448 451 version = '02'
449 452
450 453 def _deltaheader(self, headertuple, prevnode):
451 454 node, p1, p2, deltabase, cs = headertuple
452 455 flags = 0
453 456 return node, p1, p2, deltabase, cs, flags
454 457
455 458 class cg3unpacker(cg2unpacker):
456 459 """Unpacker for cg3 streams.
457 460
458 461 cg3 streams add support for exchanging treemanifests and revlog
459 462 flags. It adds the revlog flags to the delta header and an empty chunk
460 463 separating manifests and files.
461 464 """
462 465 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
463 466 deltaheadersize = deltaheader.size
464 467 version = '03'
465 468 _grouplistcount = 2 # One list of manifests and one list of files
466 469
467 470 def _deltaheader(self, headertuple, prevnode):
468 471 node, p1, p2, deltabase, cs, flags = headertuple
469 472 return node, p1, p2, deltabase, cs, flags
470 473
471 474 def _unpackmanifests(self, repo, revmap, trp, prog):
472 475 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
473 476 for chunkdata in iter(self.filelogheader, {}):
474 477 # If we get here, there are directory manifests in the changegroup
475 478 d = chunkdata["filename"]
476 479 repo.ui.debug("adding %s revisions\n" % d)
477 480 deltas = self.deltaiter()
478 481 if not repo.manifestlog.getstorage(d).addgroup(deltas, revmap, trp):
479 482 raise error.Abort(_("received dir revlog group is empty"))
480 483
481 484 class headerlessfixup(object):
482 485 def __init__(self, fh, h):
483 486 self._h = h
484 487 self._fh = fh
485 488 def read(self, n):
486 489 if self._h:
487 490 d, self._h = self._h[:n], self._h[n:]
488 491 if len(d) < n:
489 492 d += readexactly(self._fh, n - len(d))
490 493 return d
491 494 return readexactly(self._fh, n)
492 495
493 496 def _revisiondeltatochunks(delta, headerfn):
494 497 """Serialize a revisiondelta to changegroup chunks."""
495 498
496 499 # The captured revision delta may be encoded as a delta against
497 500 # a base revision or as a full revision. The changegroup format
498 501 # requires that everything on the wire be deltas. So for full
499 502 # revisions, we need to invent a header that says to rewrite
500 503 # data.
501 504
502 505 if delta.delta is not None:
503 506 prefix, data = b'', delta.delta
504 507 elif delta.basenode == nullid:
505 508 data = delta.revision
506 509 prefix = mdiff.trivialdiffheader(len(data))
507 510 else:
508 511 data = delta.revision
509 512 prefix = mdiff.replacediffheader(delta.baserevisionsize,
510 513 len(data))
511 514
512 515 meta = headerfn(delta)
513 516
514 517 yield chunkheader(len(meta) + len(prefix) + len(data))
515 518 yield meta
516 519 if prefix:
517 520 yield prefix
518 521 yield data
519 522
520 523 def _sortnodesellipsis(store, nodes, cl, lookup):
521 524 """Sort nodes for changegroup generation."""
522 525 # Ellipses serving mode.
523 526 #
524 527 # In a perfect world, we'd generate better ellipsis-ified graphs
525 528 # for non-changelog revlogs. In practice, we haven't started doing
526 529 # that yet, so the resulting DAGs for the manifestlog and filelogs
527 530 # are actually full of bogus parentage on all the ellipsis
528 531 # nodes. This has the side effect that, while the contents are
529 532 # correct, the individual DAGs might be completely out of whack in
530 533 # a case like 882681bc3166 and its ancestors (back about 10
531 534 # revisions or so) in the main hg repo.
532 535 #
533 536 # The one invariant we *know* holds is that the new (potentially
534 537 # bogus) DAG shape will be valid if we order the nodes in the
535 538 # order that they're introduced in dramatis personae by the
536 539 # changelog, so what we do is we sort the non-changelog histories
537 540 # by the order in which they are used by the changelog.
538 541 key = lambda n: cl.rev(lookup(n))
539 542 return sorted(nodes, key=key)
540 543
541 544 def _resolvenarrowrevisioninfo(cl, store, ischangelog, rev, linkrev,
542 545 linknode, clrevtolocalrev, fullclnodes,
543 546 precomputedellipsis):
544 547 linkparents = precomputedellipsis[linkrev]
545 548 def local(clrev):
546 549 """Turn a changelog revnum into a local revnum.
547 550
548 551 The ellipsis dag is stored as revnums on the changelog,
549 552 but when we're producing ellipsis entries for
550 553 non-changelog revlogs, we need to turn those numbers into
551 554 something local. This does that for us, and during the
552 555 changelog sending phase will also expand the stored
553 556 mappings as needed.
554 557 """
555 558 if clrev == nullrev:
556 559 return nullrev
557 560
558 561 if ischangelog:
559 562 return clrev
560 563
561 564 # Walk the ellipsis-ized changelog breadth-first looking for a
562 565 # change that has been linked from the current revlog.
563 566 #
564 567 # For a flat manifest revlog only a single step should be necessary
565 568 # as all relevant changelog entries are relevant to the flat
566 569 # manifest.
567 570 #
568 571 # For a filelog or tree manifest dirlog however not every changelog
569 572 # entry will have been relevant, so we need to skip some changelog
570 573 # nodes even after ellipsis-izing.
571 574 walk = [clrev]
572 575 while walk:
573 576 p = walk[0]
574 577 walk = walk[1:]
575 578 if p in clrevtolocalrev:
576 579 return clrevtolocalrev[p]
577 580 elif p in fullclnodes:
578 581 walk.extend([pp for pp in cl.parentrevs(p)
579 582 if pp != nullrev])
580 583 elif p in precomputedellipsis:
581 584 walk.extend([pp for pp in precomputedellipsis[p]
582 585 if pp != nullrev])
583 586 else:
584 587 # In this case, we've got an ellipsis with parents
585 588 # outside the current bundle (likely an
586 589 # incremental pull). We "know" that we can use the
587 590 # value of this same revlog at whatever revision
588 591 # is pointed to by linknode. "Know" is in scare
589 592 # quotes because I haven't done enough examination
590 593 # of edge cases to convince myself this is really
591 594 # a fact - it works for all the (admittedly
592 595 # thorough) cases in our testsuite, but I would be
593 596 # somewhat unsurprised to find a case in the wild
594 597 # where this breaks down a bit. That said, I don't
595 598 # know if it would hurt anything.
596 599 for i in pycompat.xrange(rev, 0, -1):
597 600 if store.linkrev(i) == clrev:
598 601 return i
599 602 # We failed to resolve a parent for this node, so
600 603 # we crash the changegroup construction.
601 604 raise error.Abort(
602 605 'unable to resolve parent while packing %r %r'
603 606 ' for changeset %r' % (store.indexfile, rev, clrev))
604 607
605 608 return nullrev
606 609
607 610 if not linkparents or (
608 611 store.parentrevs(rev) == (nullrev, nullrev)):
609 612 p1, p2 = nullrev, nullrev
610 613 elif len(linkparents) == 1:
611 614 p1, = sorted(local(p) for p in linkparents)
612 615 p2 = nullrev
613 616 else:
614 617 p1, p2 = sorted(local(p) for p in linkparents)
615 618
616 619 p1node, p2node = store.node(p1), store.node(p2)
617 620
618 621 return p1node, p2node, linknode
619 622
620 623 def deltagroup(repo, store, nodes, ischangelog, lookup, forcedeltaparentprev,
621 624 topic=None,
622 625 ellipses=False, clrevtolocalrev=None, fullclnodes=None,
623 626 precomputedellipsis=None):
624 627 """Calculate deltas for a set of revisions.
625 628
626 629 Is a generator of ``revisiondelta`` instances.
627 630
628 631 If topic is not None, progress detail will be generated using this
629 632 topic name (e.g. changesets, manifests, etc).
630 633 """
631 634 if not nodes:
632 635 return
633 636
634 637 cl = repo.changelog
635 638
636 639 if ischangelog:
637 640 # `hg log` shows changesets in storage order. To preserve order
638 641 # across clones, send out changesets in storage order.
639 642 nodesorder = 'storage'
640 643 elif ellipses:
641 644 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
642 645 nodesorder = 'nodes'
643 646 else:
644 647 nodesorder = None
645 648
646 649 # Perform ellipses filtering and revision massaging. We do this before
647 650 # emitrevisions() because a) filtering out revisions creates less work
648 651 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
649 652 # assumptions about delta choices and we would possibly send a delta
650 653 # referencing a missing base revision.
651 654 #
652 655 # Also, calling lookup() has side-effects with regards to populating
653 656 # data structures. If we don't call lookup() for each node or if we call
654 657 # lookup() after the first pass through each node, things can break -
655 658 # possibly intermittently depending on the python hash seed! For that
656 659 # reason, we store a mapping of all linknodes during the initial node
657 660 # pass rather than use lookup() on the output side.
658 661 if ellipses:
659 662 filtered = []
660 663 adjustedparents = {}
661 664 linknodes = {}
662 665
663 666 for node in nodes:
664 667 rev = store.rev(node)
665 668 linknode = lookup(node)
666 669 linkrev = cl.rev(linknode)
667 670 clrevtolocalrev[linkrev] = rev
668 671
669 672 # If linknode is in fullclnodes, it means the corresponding
670 673 # changeset was a full changeset and is being sent unaltered.
671 674 if linknode in fullclnodes:
672 675 linknodes[node] = linknode
673 676
674 677 # If the corresponding changeset wasn't in the set computed
675 678 # as relevant to us, it should be dropped outright.
676 679 elif linkrev not in precomputedellipsis:
677 680 continue
678 681
679 682 else:
680 683 # We could probably do this later and avoid the dict
681 684 # holding state. But it likely doesn't matter.
682 685 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
683 686 cl, store, ischangelog, rev, linkrev, linknode,
684 687 clrevtolocalrev, fullclnodes, precomputedellipsis)
685 688
686 689 adjustedparents[node] = (p1node, p2node)
687 690 linknodes[node] = linknode
688 691
689 692 filtered.append(node)
690 693
691 694 nodes = filtered
692 695
693 696 # We expect the first pass to be fast, so we only engage the progress
694 697 # meter for constructing the revision deltas.
695 698 progress = None
696 699 if topic is not None:
697 700 progress = repo.ui.makeprogress(topic, unit=_('chunks'),
698 701 total=len(nodes))
699 702
700 703 configtarget = repo.ui.config('devel', 'bundle.delta')
701 704 if configtarget not in ('', 'p1', 'full'):
702 705 msg = _("""config "devel.bundle.delta" as unknown value: %s""")
703 706 repo.ui.warn(msg % configtarget)
704 707
705 708 deltamode = repository.CG_DELTAMODE_STD
706 709 if forcedeltaparentprev:
707 710 deltamode = repository.CG_DELTAMODE_PREV
708 711 elif configtarget == 'p1':
709 712 deltamode = repository.CG_DELTAMODE_P1
710 713 elif configtarget == 'full':
711 714 deltamode = repository.CG_DELTAMODE_FULL
712 715
713 716 revisions = store.emitrevisions(
714 717 nodes,
715 718 nodesorder=nodesorder,
716 719 revisiondata=True,
717 720 assumehaveparentrevisions=not ellipses,
718 721 deltamode=deltamode)
719 722
720 723 for i, revision in enumerate(revisions):
721 724 if progress:
722 725 progress.update(i + 1)
723 726
724 727 if ellipses:
725 728 linknode = linknodes[revision.node]
726 729
727 730 if revision.node in adjustedparents:
728 731 p1node, p2node = adjustedparents[revision.node]
729 732 revision.p1node = p1node
730 733 revision.p2node = p2node
731 734 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
732 735
733 736 else:
734 737 linknode = lookup(revision.node)
735 738
736 739 revision.linknode = linknode
737 740 yield revision
738 741
739 742 if progress:
740 743 progress.complete()
741 744
742 745 class cgpacker(object):
743 746 def __init__(self, repo, oldmatcher, matcher, version,
744 747 builddeltaheader, manifestsend,
745 748 forcedeltaparentprev=False,
746 749 bundlecaps=None, ellipses=False,
747 750 shallow=False, ellipsisroots=None, fullnodes=None):
748 751 """Given a source repo, construct a bundler.
749 752
750 753 oldmatcher is a matcher that matches on files the client already has.
751 754 These will not be included in the changegroup.
752 755
753 756 matcher is a matcher that matches on files to include in the
754 757 changegroup. Used to facilitate sparse changegroups.
755 758
756 759 forcedeltaparentprev indicates whether delta parents must be against
757 760 the previous revision in a delta group. This should only be used for
758 761 compatibility with changegroup version 1.
759 762
760 763 builddeltaheader is a callable that constructs the header for a group
761 764 delta.
762 765
763 766 manifestsend is a chunk to send after manifests have been fully emitted.
764 767
765 768 ellipses indicates whether ellipsis serving mode is enabled.
766 769
767 770 bundlecaps is optional and can be used to specify the set of
768 771 capabilities which can be used to build the bundle. While bundlecaps is
769 772 unused in core Mercurial, extensions rely on this feature to communicate
770 773 capabilities to customize the changegroup packer.
771 774
772 775 shallow indicates whether shallow data might be sent. The packer may
773 776 need to pack file contents not introduced by the changes being packed.
774 777
775 778 fullnodes is the set of changelog nodes which should not be ellipsis
776 779 nodes. We store this rather than the set of nodes that should be
777 780 ellipsis because for very large histories we expect this to be
778 781 significantly smaller.
779 782 """
780 783 assert oldmatcher
781 784 assert matcher
782 785 self._oldmatcher = oldmatcher
783 786 self._matcher = matcher
784 787
785 788 self.version = version
786 789 self._forcedeltaparentprev = forcedeltaparentprev
787 790 self._builddeltaheader = builddeltaheader
788 791 self._manifestsend = manifestsend
789 792 self._ellipses = ellipses
790 793
791 794 # Set of capabilities we can use to build the bundle.
792 795 if bundlecaps is None:
793 796 bundlecaps = set()
794 797 self._bundlecaps = bundlecaps
795 798 self._isshallow = shallow
796 799 self._fullclnodes = fullnodes
797 800
798 801 # Maps ellipsis revs to their roots at the changelog level.
799 802 self._precomputedellipsis = ellipsisroots
800 803
801 804 self._repo = repo
802 805
803 806 if self._repo.ui.verbose and not self._repo.ui.debugflag:
804 807 self._verbosenote = self._repo.ui.note
805 808 else:
806 809 self._verbosenote = lambda s: None
807 810
808 811 def generate(self, commonrevs, clnodes, fastpathlinkrev, source,
809 812 changelog=True):
810 813 """Yield a sequence of changegroup byte chunks.
811 814 If changelog is False, changelog data won't be added to changegroup
812 815 """
813 816
814 817 repo = self._repo
815 818 cl = repo.changelog
816 819
817 820 self._verbosenote(_('uncompressed size of bundle content:\n'))
818 821 size = 0
819 822
820 823 clstate, deltas = self._generatechangelog(cl, clnodes,
821 824 generate=changelog)
822 825 for delta in deltas:
823 826 for chunk in _revisiondeltatochunks(delta,
824 827 self._builddeltaheader):
825 828 size += len(chunk)
826 829 yield chunk
827 830
828 831 close = closechunk()
829 832 size += len(close)
830 833 yield closechunk()
831 834
832 835 self._verbosenote(_('%8.i (changelog)\n') % size)
833 836
834 837 clrevorder = clstate['clrevorder']
835 838 manifests = clstate['manifests']
836 839 changedfiles = clstate['changedfiles']
837 840
838 841 # We need to make sure that the linkrev in the changegroup refers to
839 842 # the first changeset that introduced the manifest or file revision.
840 843 # The fastpath is usually safer than the slowpath, because the filelogs
841 844 # are walked in revlog order.
842 845 #
843 846 # When taking the slowpath when the manifest revlog uses generaldelta,
844 847 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
845 848 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
846 849 #
847 850 # When taking the fastpath, we are only vulnerable to reordering
848 851 # of the changelog itself. The changelog never uses generaldelta and is
849 852 # never reordered. To handle this case, we simply take the slowpath,
850 853 # which already has the 'clrevorder' logic. This was also fixed in
851 854 # cc0ff93d0c0c.
852 855
853 856 # Treemanifests don't work correctly with fastpathlinkrev
854 857 # either, because we don't discover which directory nodes to
855 858 # send along with files. This could probably be fixed.
856 859 fastpathlinkrev = fastpathlinkrev and (
857 860 'treemanifest' not in repo.requirements)
858 861
859 862 fnodes = {} # needed file nodes
860 863
861 864 size = 0
862 865 it = self.generatemanifests(
863 866 commonrevs, clrevorder, fastpathlinkrev, manifests, fnodes, source,
864 867 clstate['clrevtomanifestrev'])
865 868
866 869 for tree, deltas in it:
867 870 if tree:
868 871 assert self.version == b'03'
869 872 chunk = _fileheader(tree)
870 873 size += len(chunk)
871 874 yield chunk
872 875
873 876 for delta in deltas:
874 877 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
875 878 for chunk in chunks:
876 879 size += len(chunk)
877 880 yield chunk
878 881
879 882 close = closechunk()
880 883 size += len(close)
881 884 yield close
882 885
883 886 self._verbosenote(_('%8.i (manifests)\n') % size)
884 887 yield self._manifestsend
885 888
886 889 mfdicts = None
887 890 if self._ellipses and self._isshallow:
888 891 mfdicts = [(self._repo.manifestlog[n].read(), lr)
889 892 for (n, lr) in manifests.iteritems()]
890 893
891 894 manifests.clear()
892 895 clrevs = set(cl.rev(x) for x in clnodes)
893 896
894 897 it = self.generatefiles(changedfiles, commonrevs,
895 898 source, mfdicts, fastpathlinkrev,
896 899 fnodes, clrevs)
897 900
898 901 for path, deltas in it:
899 902 h = _fileheader(path)
900 903 size = len(h)
901 904 yield h
902 905
903 906 for delta in deltas:
904 907 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
905 908 for chunk in chunks:
906 909 size += len(chunk)
907 910 yield chunk
908 911
909 912 close = closechunk()
910 913 size += len(close)
911 914 yield close
912 915
913 916 self._verbosenote(_('%8.i %s\n') % (size, path))
914 917
915 918 yield closechunk()
916 919
917 920 if clnodes:
918 921 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
919 922
920 923 def _generatechangelog(self, cl, nodes, generate=True):
921 924 """Generate data for changelog chunks.
922 925
923 926 Returns a 2-tuple of a dict containing state and an iterable of
924 927 byte chunks. The state will not be fully populated until the
925 928 chunk stream has been fully consumed.
926 929
927 930 if generate is False, the state will be fully populated and no chunk
928 931 stream will be yielded
929 932 """
930 933 clrevorder = {}
931 934 manifests = {}
932 935 mfl = self._repo.manifestlog
933 936 changedfiles = set()
934 937 clrevtomanifestrev = {}
935 938
936 939 state = {
937 940 'clrevorder': clrevorder,
938 941 'manifests': manifests,
939 942 'changedfiles': changedfiles,
940 943 'clrevtomanifestrev': clrevtomanifestrev,
941 944 }
942 945
943 946 if not (generate or self._ellipses):
944 947 # sort the nodes in storage order
945 948 nodes = sorted(nodes, key=cl.rev)
946 949 for node in nodes:
947 950 c = cl.changelogrevision(node)
948 951 clrevorder[node] = len(clrevorder)
949 952 # record the first changeset introducing this manifest version
950 953 manifests.setdefault(c.manifest, node)
951 954 # Record a complete list of potentially-changed files in
952 955 # this manifest.
953 956 changedfiles.update(c.files)
954 957
955 958 return state, ()
956 959
957 960 # Callback for the changelog, used to collect changed files and
958 961 # manifest nodes.
959 962 # Returns the linkrev node (identity in the changelog case).
960 963 def lookupcl(x):
961 964 c = cl.changelogrevision(x)
962 965 clrevorder[x] = len(clrevorder)
963 966
964 967 if self._ellipses:
965 968 # Only update manifests if x is going to be sent. Otherwise we
966 969 # end up with bogus linkrevs specified for manifests and
967 970 # we skip some manifest nodes that we should otherwise
968 971 # have sent.
969 972 if (x in self._fullclnodes
970 973 or cl.rev(x) in self._precomputedellipsis):
971 974
972 975 manifestnode = c.manifest
973 976 # Record the first changeset introducing this manifest
974 977 # version.
975 978 manifests.setdefault(manifestnode, x)
976 979 # Set this narrow-specific dict so we have the lowest
977 980 # manifest revnum to look up for this cl revnum. (Part of
978 981 # mapping changelog ellipsis parents to manifest ellipsis
979 982 # parents)
980 983 clrevtomanifestrev.setdefault(
981 984 cl.rev(x), mfl.rev(manifestnode))
982 985 # We can't trust the changed files list in the changeset if the
983 986 # client requested a shallow clone.
984 987 if self._isshallow:
985 988 changedfiles.update(mfl[c.manifest].read().keys())
986 989 else:
987 990 changedfiles.update(c.files)
988 991 else:
989 992 # record the first changeset introducing this manifest version
990 993 manifests.setdefault(c.manifest, x)
991 994 # Record a complete list of potentially-changed files in
992 995 # this manifest.
993 996 changedfiles.update(c.files)
994 997
995 998 return x
996 999
997 1000 gen = deltagroup(
998 1001 self._repo, cl, nodes, True, lookupcl,
999 1002 self._forcedeltaparentprev,
1000 1003 ellipses=self._ellipses,
1001 1004 topic=_('changesets'),
1002 1005 clrevtolocalrev={},
1003 1006 fullclnodes=self._fullclnodes,
1004 1007 precomputedellipsis=self._precomputedellipsis)
1005 1008
1006 1009 return state, gen
1007 1010
1008 1011 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev,
1009 1012 manifests, fnodes, source, clrevtolocalrev):
1010 1013 """Returns an iterator of changegroup chunks containing manifests.
1011 1014
1012 1015 `source` is unused here, but is used by extensions like remotefilelog to
1013 1016 change what is sent based in pulls vs pushes, etc.
1014 1017 """
1015 1018 repo = self._repo
1016 1019 mfl = repo.manifestlog
1017 1020 tmfnodes = {'': manifests}
1018 1021
1019 1022 # Callback for the manifest, used to collect linkrevs for filelog
1020 1023 # revisions.
1021 1024 # Returns the linkrev node (collected in lookupcl).
1022 1025 def makelookupmflinknode(tree, nodes):
1023 1026 if fastpathlinkrev:
1024 1027 assert not tree
1025 1028 return manifests.__getitem__
1026 1029
1027 1030 def lookupmflinknode(x):
1028 1031 """Callback for looking up the linknode for manifests.
1029 1032
1030 1033 Returns the linkrev node for the specified manifest.
1031 1034
1032 1035 SIDE EFFECT:
1033 1036
1034 1037 1) fclnodes gets populated with the list of relevant
1035 1038 file nodes if we're not using fastpathlinkrev
1036 1039 2) When treemanifests are in use, collects treemanifest nodes
1037 1040 to send
1038 1041
1039 1042 Note that this means manifests must be completely sent to
1040 1043 the client before you can trust the list of files and
1041 1044 treemanifests to send.
1042 1045 """
1043 1046 clnode = nodes[x]
1044 1047 mdata = mfl.get(tree, x).readfast(shallow=True)
1045 1048 for p, n, fl in mdata.iterentries():
1046 1049 if fl == 't': # subdirectory manifest
1047 1050 subtree = tree + p + '/'
1048 1051 tmfclnodes = tmfnodes.setdefault(subtree, {})
1049 1052 tmfclnode = tmfclnodes.setdefault(n, clnode)
1050 1053 if clrevorder[clnode] < clrevorder[tmfclnode]:
1051 1054 tmfclnodes[n] = clnode
1052 1055 else:
1053 1056 f = tree + p
1054 1057 fclnodes = fnodes.setdefault(f, {})
1055 1058 fclnode = fclnodes.setdefault(n, clnode)
1056 1059 if clrevorder[clnode] < clrevorder[fclnode]:
1057 1060 fclnodes[n] = clnode
1058 1061 return clnode
1059 1062 return lookupmflinknode
1060 1063
1061 1064 while tmfnodes:
1062 1065 tree, nodes = tmfnodes.popitem()
1063 1066
1064 1067 should_visit = self._matcher.visitdir(tree[:-1])
1065 1068 if tree and not should_visit:
1066 1069 continue
1067 1070
1068 1071 store = mfl.getstorage(tree)
1069 1072
1070 1073 if not should_visit:
1071 1074 # No nodes to send because this directory is out of
1072 1075 # the client's view of the repository (probably
1073 1076 # because of narrow clones). Do this even for the root
1074 1077 # directory (tree=='')
1075 1078 prunednodes = []
1076 1079 else:
1077 1080 # Avoid sending any manifest nodes we can prove the
1078 1081 # client already has by checking linkrevs. See the
1079 1082 # related comment in generatefiles().
1080 1083 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1081 1084
1082 1085 if tree and not prunednodes:
1083 1086 continue
1084 1087
1085 1088 lookupfn = makelookupmflinknode(tree, nodes)
1086 1089
1087 1090 deltas = deltagroup(
1088 1091 self._repo, store, prunednodes, False, lookupfn,
1089 1092 self._forcedeltaparentprev,
1090 1093 ellipses=self._ellipses,
1091 1094 topic=_('manifests'),
1092 1095 clrevtolocalrev=clrevtolocalrev,
1093 1096 fullclnodes=self._fullclnodes,
1094 1097 precomputedellipsis=self._precomputedellipsis)
1095 1098
1096 1099 if not self._oldmatcher.visitdir(store.tree[:-1]):
1097 1100 yield tree, deltas
1098 1101 else:
1099 1102 # 'deltas' is a generator and we need to consume it even if
1100 1103 # we are not going to send it because a side-effect is that
1101 1104 # it updates tmdnodes (via lookupfn)
1102 1105 for d in deltas:
1103 1106 pass
1104 1107 if not tree:
1105 1108 yield tree, []
1106 1109
1107 1110 def _prunemanifests(self, store, nodes, commonrevs):
1108 1111 if not self._ellipses:
1109 1112 # In non-ellipses case and large repositories, it is better to
1110 1113 # prevent calling of store.rev and store.linkrev on a lot of
1111 1114 # nodes as compared to sending some extra data
1112 1115 return nodes.copy()
1113 1116 # This is split out as a separate method to allow filtering
1114 1117 # commonrevs in extension code.
1115 1118 #
1116 1119 # TODO(augie): this shouldn't be required, instead we should
1117 1120 # make filtering of revisions to send delegated to the store
1118 1121 # layer.
1119 1122 frev, flr = store.rev, store.linkrev
1120 1123 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1121 1124
1122 1125 # The 'source' parameter is useful for extensions
1123 1126 def generatefiles(self, changedfiles, commonrevs, source,
1124 1127 mfdicts, fastpathlinkrev, fnodes, clrevs):
1125 1128 changedfiles = [f for f in changedfiles
1126 1129 if self._matcher(f) and not self._oldmatcher(f)]
1127 1130
1128 1131 if not fastpathlinkrev:
1129 1132 def normallinknodes(unused, fname):
1130 1133 return fnodes.get(fname, {})
1131 1134 else:
1132 1135 cln = self._repo.changelog.node
1133 1136
1134 1137 def normallinknodes(store, fname):
1135 1138 flinkrev = store.linkrev
1136 1139 fnode = store.node
1137 1140 revs = ((r, flinkrev(r)) for r in store)
1138 1141 return dict((fnode(r), cln(lr))
1139 1142 for r, lr in revs if lr in clrevs)
1140 1143
1141 1144 clrevtolocalrev = {}
1142 1145
1143 1146 if self._isshallow:
1144 1147 # In a shallow clone, the linknodes callback needs to also include
1145 1148 # those file nodes that are in the manifests we sent but weren't
1146 1149 # introduced by those manifests.
1147 1150 commonctxs = [self._repo[c] for c in commonrevs]
1148 1151 clrev = self._repo.changelog.rev
1149 1152
1150 1153 def linknodes(flog, fname):
1151 1154 for c in commonctxs:
1152 1155 try:
1153 1156 fnode = c.filenode(fname)
1154 1157 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1155 1158 except error.ManifestLookupError:
1156 1159 pass
1157 1160 links = normallinknodes(flog, fname)
1158 1161 if len(links) != len(mfdicts):
1159 1162 for mf, lr in mfdicts:
1160 1163 fnode = mf.get(fname, None)
1161 1164 if fnode in links:
1162 1165 links[fnode] = min(links[fnode], lr, key=clrev)
1163 1166 elif fnode:
1164 1167 links[fnode] = lr
1165 1168 return links
1166 1169 else:
1167 1170 linknodes = normallinknodes
1168 1171
1169 1172 repo = self._repo
1170 1173 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1171 1174 total=len(changedfiles))
1172 1175 for i, fname in enumerate(sorted(changedfiles)):
1173 1176 filerevlog = repo.file(fname)
1174 1177 if not filerevlog:
1175 1178 raise error.Abort(_("empty or missing file data for %s") %
1176 1179 fname)
1177 1180
1178 1181 clrevtolocalrev.clear()
1179 1182
1180 1183 linkrevnodes = linknodes(filerevlog, fname)
1181 1184 # Lookup for filenodes, we collected the linkrev nodes above in the
1182 1185 # fastpath case and with lookupmf in the slowpath case.
1183 1186 def lookupfilelog(x):
1184 1187 return linkrevnodes[x]
1185 1188
1186 1189 frev, flr = filerevlog.rev, filerevlog.linkrev
1187 1190 # Skip sending any filenode we know the client already
1188 1191 # has. This avoids over-sending files relatively
1189 1192 # inexpensively, so it's not a problem if we under-filter
1190 1193 # here.
1191 1194 filenodes = [n for n in linkrevnodes
1192 1195 if flr(frev(n)) not in commonrevs]
1193 1196
1194 1197 if not filenodes:
1195 1198 continue
1196 1199
1197 1200 progress.update(i + 1, item=fname)
1198 1201
1199 1202 deltas = deltagroup(
1200 1203 self._repo, filerevlog, filenodes, False, lookupfilelog,
1201 1204 self._forcedeltaparentprev,
1202 1205 ellipses=self._ellipses,
1203 1206 clrevtolocalrev=clrevtolocalrev,
1204 1207 fullclnodes=self._fullclnodes,
1205 1208 precomputedellipsis=self._precomputedellipsis)
1206 1209
1207 1210 yield fname, deltas
1208 1211
1209 1212 progress.complete()
1210 1213
1211 1214 def _makecg1packer(repo, oldmatcher, matcher, bundlecaps,
1212 1215 ellipses=False, shallow=False, ellipsisroots=None,
1213 1216 fullnodes=None):
1214 1217 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1215 1218 d.node, d.p1node, d.p2node, d.linknode)
1216 1219
1217 1220 return cgpacker(repo, oldmatcher, matcher, b'01',
1218 1221 builddeltaheader=builddeltaheader,
1219 1222 manifestsend=b'',
1220 1223 forcedeltaparentprev=True,
1221 1224 bundlecaps=bundlecaps,
1222 1225 ellipses=ellipses,
1223 1226 shallow=shallow,
1224 1227 ellipsisroots=ellipsisroots,
1225 1228 fullnodes=fullnodes)
1226 1229
1227 1230 def _makecg2packer(repo, oldmatcher, matcher, bundlecaps,
1228 1231 ellipses=False, shallow=False, ellipsisroots=None,
1229 1232 fullnodes=None):
1230 1233 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1231 1234 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1232 1235
1233 1236 return cgpacker(repo, oldmatcher, matcher, b'02',
1234 1237 builddeltaheader=builddeltaheader,
1235 1238 manifestsend=b'',
1236 1239 bundlecaps=bundlecaps,
1237 1240 ellipses=ellipses,
1238 1241 shallow=shallow,
1239 1242 ellipsisroots=ellipsisroots,
1240 1243 fullnodes=fullnodes)
1241 1244
1242 1245 def _makecg3packer(repo, oldmatcher, matcher, bundlecaps,
1243 1246 ellipses=False, shallow=False, ellipsisroots=None,
1244 1247 fullnodes=None):
1245 1248 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1246 1249 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1247 1250
1248 1251 return cgpacker(repo, oldmatcher, matcher, b'03',
1249 1252 builddeltaheader=builddeltaheader,
1250 1253 manifestsend=closechunk(),
1251 1254 bundlecaps=bundlecaps,
1252 1255 ellipses=ellipses,
1253 1256 shallow=shallow,
1254 1257 ellipsisroots=ellipsisroots,
1255 1258 fullnodes=fullnodes)
1256 1259
1257 1260 _packermap = {'01': (_makecg1packer, cg1unpacker),
1258 1261 # cg2 adds support for exchanging generaldelta
1259 1262 '02': (_makecg2packer, cg2unpacker),
1260 1263 # cg3 adds support for exchanging revlog flags and treemanifests
1261 1264 '03': (_makecg3packer, cg3unpacker),
1262 1265 }
1263 1266
1264 1267 def allsupportedversions(repo):
1265 1268 versions = set(_packermap.keys())
1266 1269 if not (repo.ui.configbool('experimental', 'changegroup3') or
1267 1270 repo.ui.configbool('experimental', 'treemanifest') or
1268 1271 'treemanifest' in repo.requirements):
1269 1272 versions.discard('03')
1270 1273 return versions
1271 1274
1272 1275 # Changegroup versions that can be applied to the repo
1273 1276 def supportedincomingversions(repo):
1274 1277 return allsupportedversions(repo)
1275 1278
1276 1279 # Changegroup versions that can be created from the repo
1277 1280 def supportedoutgoingversions(repo):
1278 1281 versions = allsupportedversions(repo)
1279 1282 if 'treemanifest' in repo.requirements:
1280 1283 # Versions 01 and 02 support only flat manifests and it's just too
1281 1284 # expensive to convert between the flat manifest and tree manifest on
1282 1285 # the fly. Since tree manifests are hashed differently, all of history
1283 1286 # would have to be converted. Instead, we simply don't even pretend to
1284 1287 # support versions 01 and 02.
1285 1288 versions.discard('01')
1286 1289 versions.discard('02')
1287 1290 if repository.NARROW_REQUIREMENT in repo.requirements:
1288 1291 # Versions 01 and 02 don't support revlog flags, and we need to
1289 1292 # support that for stripping and unbundling to work.
1290 1293 versions.discard('01')
1291 1294 versions.discard('02')
1292 1295 if LFS_REQUIREMENT in repo.requirements:
1293 1296 # Versions 01 and 02 don't support revlog flags, and we need to
1294 1297 # mark LFS entries with REVIDX_EXTSTORED.
1295 1298 versions.discard('01')
1296 1299 versions.discard('02')
1297 1300
1298 1301 return versions
1299 1302
1300 1303 def localversion(repo):
1301 1304 # Finds the best version to use for bundles that are meant to be used
1302 1305 # locally, such as those from strip and shelve, and temporary bundles.
1303 1306 return max(supportedoutgoingversions(repo))
1304 1307
1305 1308 def safeversion(repo):
1306 1309 # Finds the smallest version that it's safe to assume clients of the repo
1307 1310 # will support. For example, all hg versions that support generaldelta also
1308 1311 # support changegroup 02.
1309 1312 versions = supportedoutgoingversions(repo)
1310 1313 if 'generaldelta' in repo.requirements:
1311 1314 versions.discard('01')
1312 1315 assert versions
1313 1316 return min(versions)
1314 1317
1315 1318 def getbundler(version, repo, bundlecaps=None, oldmatcher=None,
1316 1319 matcher=None, ellipses=False, shallow=False,
1317 1320 ellipsisroots=None, fullnodes=None):
1318 1321 assert version in supportedoutgoingversions(repo)
1319 1322
1320 1323 if matcher is None:
1321 1324 matcher = matchmod.always()
1322 1325 if oldmatcher is None:
1323 1326 oldmatcher = matchmod.never()
1324 1327
1325 1328 if version == '01' and not matcher.always():
1326 1329 raise error.ProgrammingError('version 01 changegroups do not support '
1327 1330 'sparse file matchers')
1328 1331
1329 1332 if ellipses and version in (b'01', b'02'):
1330 1333 raise error.Abort(
1331 1334 _('ellipsis nodes require at least cg3 on client and server, '
1332 1335 'but negotiated version %s') % version)
1333 1336
1334 1337 # Requested files could include files not in the local store. So
1335 1338 # filter those out.
1336 1339 matcher = repo.narrowmatch(matcher)
1337 1340
1338 1341 fn = _packermap[version][0]
1339 1342 return fn(repo, oldmatcher, matcher, bundlecaps, ellipses=ellipses,
1340 1343 shallow=shallow, ellipsisroots=ellipsisroots,
1341 1344 fullnodes=fullnodes)
1342 1345
1343 1346 def getunbundler(version, fh, alg, extras=None):
1344 1347 return _packermap[version][1](fh, alg, extras=extras)
1345 1348
1346 1349 def _changegroupinfo(repo, nodes, source):
1347 1350 if repo.ui.verbose or source == 'bundle':
1348 1351 repo.ui.status(_("%d changesets found\n") % len(nodes))
1349 1352 if repo.ui.debugflag:
1350 1353 repo.ui.debug("list of changesets:\n")
1351 1354 for node in nodes:
1352 1355 repo.ui.debug("%s\n" % hex(node))
1353 1356
1354 1357 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1355 1358 bundlecaps=None):
1356 1359 cgstream = makestream(repo, outgoing, version, source,
1357 1360 fastpath=fastpath, bundlecaps=bundlecaps)
1358 1361 return getunbundler(version, util.chunkbuffer(cgstream), None,
1359 1362 {'clcount': len(outgoing.missing) })
1360 1363
1361 1364 def makestream(repo, outgoing, version, source, fastpath=False,
1362 1365 bundlecaps=None, matcher=None):
1363 1366 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1364 1367 matcher=matcher)
1365 1368
1366 1369 repo = repo.unfiltered()
1367 1370 commonrevs = outgoing.common
1368 1371 csets = outgoing.missing
1369 1372 heads = outgoing.missingheads
1370 1373 # We go through the fast path if we get told to, or if all (unfiltered
1371 1374 # heads have been requested (since we then know there all linkrevs will
1372 1375 # be pulled by the client).
1373 1376 heads.sort()
1374 1377 fastpathlinkrev = fastpath or (
1375 1378 repo.filtername is None and heads == sorted(repo.heads()))
1376 1379
1377 1380 repo.hook('preoutgoing', throw=True, source=source)
1378 1381 _changegroupinfo(repo, csets, source)
1379 1382 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1380 1383
1381 1384 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1382 1385 revisions = 0
1383 1386 files = 0
1384 1387 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1385 1388 total=expectedfiles)
1386 1389 for chunkdata in iter(source.filelogheader, {}):
1387 1390 files += 1
1388 1391 f = chunkdata["filename"]
1389 1392 repo.ui.debug("adding %s revisions\n" % f)
1390 1393 progress.increment()
1391 1394 fl = repo.file(f)
1392 1395 o = len(fl)
1393 1396 try:
1394 1397 deltas = source.deltaiter()
1395 1398 if not fl.addgroup(deltas, revmap, trp):
1396 1399 raise error.Abort(_("received file revlog group is empty"))
1397 1400 except error.CensoredBaseError as e:
1398 1401 raise error.Abort(_("received delta base is censored: %s") % e)
1399 1402 revisions += len(fl) - o
1400 1403 if f in needfiles:
1401 1404 needs = needfiles[f]
1402 1405 for new in pycompat.xrange(o, len(fl)):
1403 1406 n = fl.node(new)
1404 1407 if n in needs:
1405 1408 needs.remove(n)
1406 1409 else:
1407 1410 raise error.Abort(
1408 1411 _("received spurious file revlog entry"))
1409 1412 if not needs:
1410 1413 del needfiles[f]
1411 1414 progress.complete()
1412 1415
1413 1416 for f, needs in needfiles.iteritems():
1414 1417 fl = repo.file(f)
1415 1418 for n in needs:
1416 1419 try:
1417 1420 fl.rev(n)
1418 1421 except error.LookupError:
1419 1422 raise error.Abort(
1420 1423 _('missing file data for %s:%s - run hg verify') %
1421 1424 (f, hex(n)))
1422 1425
1423 1426 return revisions, files
@@ -1,2701 +1,2703
1 1 # exchange.py - utility to exchange data between repos.
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import hashlib
12 12
13 13 from .i18n import _
14 14 from .node import (
15 15 bin,
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 )
20 20 from .thirdparty import (
21 21 attr,
22 22 )
23 23 from . import (
24 24 bookmarks as bookmod,
25 25 bundle2,
26 26 changegroup,
27 27 discovery,
28 28 error,
29 29 exchangev2,
30 30 lock as lockmod,
31 31 logexchange,
32 32 narrowspec,
33 33 obsolete,
34 34 phases,
35 35 pushkey,
36 36 pycompat,
37 repository,
38 37 scmutil,
39 38 sslutil,
40 39 streamclone,
41 40 url as urlmod,
42 41 util,
43 42 wireprototypes,
44 43 )
44 from .interfaces import (
45 repository,
46 )
45 47 from .utils import (
46 48 stringutil,
47 49 )
48 50
49 51 urlerr = util.urlerr
50 52 urlreq = util.urlreq
51 53
52 54 _NARROWACL_SECTION = 'narrowacl'
53 55
54 56 # Maps bundle version human names to changegroup versions.
55 57 _bundlespeccgversions = {'v1': '01',
56 58 'v2': '02',
57 59 'packed1': 's1',
58 60 'bundle2': '02', #legacy
59 61 }
60 62
61 63 # Maps bundle version with content opts to choose which part to bundle
62 64 _bundlespeccontentopts = {
63 65 'v1': {
64 66 'changegroup': True,
65 67 'cg.version': '01',
66 68 'obsolescence': False,
67 69 'phases': False,
68 70 'tagsfnodescache': False,
69 71 'revbranchcache': False
70 72 },
71 73 'v2': {
72 74 'changegroup': True,
73 75 'cg.version': '02',
74 76 'obsolescence': False,
75 77 'phases': False,
76 78 'tagsfnodescache': True,
77 79 'revbranchcache': True
78 80 },
79 81 'packed1' : {
80 82 'cg.version': 's1'
81 83 }
82 84 }
83 85 _bundlespeccontentopts['bundle2'] = _bundlespeccontentopts['v2']
84 86
85 87 _bundlespecvariants = {"streamv2": {"changegroup": False, "streamv2": True,
86 88 "tagsfnodescache": False,
87 89 "revbranchcache": False}}
88 90
89 91 # Compression engines allowed in version 1. THIS SHOULD NEVER CHANGE.
90 92 _bundlespecv1compengines = {'gzip', 'bzip2', 'none'}
91 93
92 94 @attr.s
93 95 class bundlespec(object):
94 96 compression = attr.ib()
95 97 wirecompression = attr.ib()
96 98 version = attr.ib()
97 99 wireversion = attr.ib()
98 100 params = attr.ib()
99 101 contentopts = attr.ib()
100 102
101 103 def parsebundlespec(repo, spec, strict=True):
102 104 """Parse a bundle string specification into parts.
103 105
104 106 Bundle specifications denote a well-defined bundle/exchange format.
105 107 The content of a given specification should not change over time in
106 108 order to ensure that bundles produced by a newer version of Mercurial are
107 109 readable from an older version.
108 110
109 111 The string currently has the form:
110 112
111 113 <compression>-<type>[;<parameter0>[;<parameter1>]]
112 114
113 115 Where <compression> is one of the supported compression formats
114 116 and <type> is (currently) a version string. A ";" can follow the type and
115 117 all text afterwards is interpreted as URI encoded, ";" delimited key=value
116 118 pairs.
117 119
118 120 If ``strict`` is True (the default) <compression> is required. Otherwise,
119 121 it is optional.
120 122
121 123 Returns a bundlespec object of (compression, version, parameters).
122 124 Compression will be ``None`` if not in strict mode and a compression isn't
123 125 defined.
124 126
125 127 An ``InvalidBundleSpecification`` is raised when the specification is
126 128 not syntactically well formed.
127 129
128 130 An ``UnsupportedBundleSpecification`` is raised when the compression or
129 131 bundle type/version is not recognized.
130 132
131 133 Note: this function will likely eventually return a more complex data
132 134 structure, including bundle2 part information.
133 135 """
134 136 def parseparams(s):
135 137 if ';' not in s:
136 138 return s, {}
137 139
138 140 params = {}
139 141 version, paramstr = s.split(';', 1)
140 142
141 143 for p in paramstr.split(';'):
142 144 if '=' not in p:
143 145 raise error.InvalidBundleSpecification(
144 146 _('invalid bundle specification: '
145 147 'missing "=" in parameter: %s') % p)
146 148
147 149 key, value = p.split('=', 1)
148 150 key = urlreq.unquote(key)
149 151 value = urlreq.unquote(value)
150 152 params[key] = value
151 153
152 154 return version, params
153 155
154 156
155 157 if strict and '-' not in spec:
156 158 raise error.InvalidBundleSpecification(
157 159 _('invalid bundle specification; '
158 160 'must be prefixed with compression: %s') % spec)
159 161
160 162 if '-' in spec:
161 163 compression, version = spec.split('-', 1)
162 164
163 165 if compression not in util.compengines.supportedbundlenames:
164 166 raise error.UnsupportedBundleSpecification(
165 167 _('%s compression is not supported') % compression)
166 168
167 169 version, params = parseparams(version)
168 170
169 171 if version not in _bundlespeccgversions:
170 172 raise error.UnsupportedBundleSpecification(
171 173 _('%s is not a recognized bundle version') % version)
172 174 else:
173 175 # Value could be just the compression or just the version, in which
174 176 # case some defaults are assumed (but only when not in strict mode).
175 177 assert not strict
176 178
177 179 spec, params = parseparams(spec)
178 180
179 181 if spec in util.compengines.supportedbundlenames:
180 182 compression = spec
181 183 version = 'v1'
182 184 # Generaldelta repos require v2.
183 185 if 'generaldelta' in repo.requirements:
184 186 version = 'v2'
185 187 # Modern compression engines require v2.
186 188 if compression not in _bundlespecv1compengines:
187 189 version = 'v2'
188 190 elif spec in _bundlespeccgversions:
189 191 if spec == 'packed1':
190 192 compression = 'none'
191 193 else:
192 194 compression = 'bzip2'
193 195 version = spec
194 196 else:
195 197 raise error.UnsupportedBundleSpecification(
196 198 _('%s is not a recognized bundle specification') % spec)
197 199
198 200 # Bundle version 1 only supports a known set of compression engines.
199 201 if version == 'v1' and compression not in _bundlespecv1compengines:
200 202 raise error.UnsupportedBundleSpecification(
201 203 _('compression engine %s is not supported on v1 bundles') %
202 204 compression)
203 205
204 206 # The specification for packed1 can optionally declare the data formats
205 207 # required to apply it. If we see this metadata, compare against what the
206 208 # repo supports and error if the bundle isn't compatible.
207 209 if version == 'packed1' and 'requirements' in params:
208 210 requirements = set(params['requirements'].split(','))
209 211 missingreqs = requirements - repo.supportedformats
210 212 if missingreqs:
211 213 raise error.UnsupportedBundleSpecification(
212 214 _('missing support for repository features: %s') %
213 215 ', '.join(sorted(missingreqs)))
214 216
215 217 # Compute contentopts based on the version
216 218 contentopts = _bundlespeccontentopts.get(version, {}).copy()
217 219
218 220 # Process the variants
219 221 if "stream" in params and params["stream"] == "v2":
220 222 variant = _bundlespecvariants["streamv2"]
221 223 contentopts.update(variant)
222 224
223 225 engine = util.compengines.forbundlename(compression)
224 226 compression, wirecompression = engine.bundletype()
225 227 wireversion = _bundlespeccgversions[version]
226 228
227 229 return bundlespec(compression, wirecompression, version, wireversion,
228 230 params, contentopts)
229 231
230 232 def readbundle(ui, fh, fname, vfs=None):
231 233 header = changegroup.readexactly(fh, 4)
232 234
233 235 alg = None
234 236 if not fname:
235 237 fname = "stream"
236 238 if not header.startswith('HG') and header.startswith('\0'):
237 239 fh = changegroup.headerlessfixup(fh, header)
238 240 header = "HG10"
239 241 alg = 'UN'
240 242 elif vfs:
241 243 fname = vfs.join(fname)
242 244
243 245 magic, version = header[0:2], header[2:4]
244 246
245 247 if magic != 'HG':
246 248 raise error.Abort(_('%s: not a Mercurial bundle') % fname)
247 249 if version == '10':
248 250 if alg is None:
249 251 alg = changegroup.readexactly(fh, 2)
250 252 return changegroup.cg1unpacker(fh, alg)
251 253 elif version.startswith('2'):
252 254 return bundle2.getunbundler(ui, fh, magicstring=magic + version)
253 255 elif version == 'S1':
254 256 return streamclone.streamcloneapplier(fh)
255 257 else:
256 258 raise error.Abort(_('%s: unknown bundle version %s') % (fname, version))
257 259
258 260 def getbundlespec(ui, fh):
259 261 """Infer the bundlespec from a bundle file handle.
260 262
261 263 The input file handle is seeked and the original seek position is not
262 264 restored.
263 265 """
264 266 def speccompression(alg):
265 267 try:
266 268 return util.compengines.forbundletype(alg).bundletype()[0]
267 269 except KeyError:
268 270 return None
269 271
270 272 b = readbundle(ui, fh, None)
271 273 if isinstance(b, changegroup.cg1unpacker):
272 274 alg = b._type
273 275 if alg == '_truncatedBZ':
274 276 alg = 'BZ'
275 277 comp = speccompression(alg)
276 278 if not comp:
277 279 raise error.Abort(_('unknown compression algorithm: %s') % alg)
278 280 return '%s-v1' % comp
279 281 elif isinstance(b, bundle2.unbundle20):
280 282 if 'Compression' in b.params:
281 283 comp = speccompression(b.params['Compression'])
282 284 if not comp:
283 285 raise error.Abort(_('unknown compression algorithm: %s') % comp)
284 286 else:
285 287 comp = 'none'
286 288
287 289 version = None
288 290 for part in b.iterparts():
289 291 if part.type == 'changegroup':
290 292 version = part.params['version']
291 293 if version in ('01', '02'):
292 294 version = 'v2'
293 295 else:
294 296 raise error.Abort(_('changegroup version %s does not have '
295 297 'a known bundlespec') % version,
296 298 hint=_('try upgrading your Mercurial '
297 299 'client'))
298 300 elif part.type == 'stream2' and version is None:
299 301 # A stream2 part requires to be part of a v2 bundle
300 302 requirements = urlreq.unquote(part.params['requirements'])
301 303 splitted = requirements.split()
302 304 params = bundle2._formatrequirementsparams(splitted)
303 305 return 'none-v2;stream=v2;%s' % params
304 306
305 307 if not version:
306 308 raise error.Abort(_('could not identify changegroup version in '
307 309 'bundle'))
308 310
309 311 return '%s-%s' % (comp, version)
310 312 elif isinstance(b, streamclone.streamcloneapplier):
311 313 requirements = streamclone.readbundle1header(fh)[2]
312 314 formatted = bundle2._formatrequirementsparams(requirements)
313 315 return 'none-packed1;%s' % formatted
314 316 else:
315 317 raise error.Abort(_('unknown bundle type: %s') % b)
316 318
317 319 def _computeoutgoing(repo, heads, common):
318 320 """Computes which revs are outgoing given a set of common
319 321 and a set of heads.
320 322
321 323 This is a separate function so extensions can have access to
322 324 the logic.
323 325
324 326 Returns a discovery.outgoing object.
325 327 """
326 328 cl = repo.changelog
327 329 if common:
328 330 hasnode = cl.hasnode
329 331 common = [n for n in common if hasnode(n)]
330 332 else:
331 333 common = [nullid]
332 334 if not heads:
333 335 heads = cl.heads()
334 336 return discovery.outgoing(repo, common, heads)
335 337
336 338 def _checkpublish(pushop):
337 339 repo = pushop.repo
338 340 ui = repo.ui
339 341 behavior = ui.config('experimental', 'auto-publish')
340 342 if pushop.publish or behavior not in ('warn', 'confirm', 'abort'):
341 343 return
342 344 remotephases = listkeys(pushop.remote, 'phases')
343 345 if not remotephases.get('publishing', False):
344 346 return
345 347
346 348 if pushop.revs is None:
347 349 published = repo.filtered('served').revs('not public()')
348 350 else:
349 351 published = repo.revs('::%ln - public()', pushop.revs)
350 352 if published:
351 353 if behavior == 'warn':
352 354 ui.warn(_('%i changesets about to be published\n')
353 355 % len(published))
354 356 elif behavior == 'confirm':
355 357 if ui.promptchoice(_('push and publish %i changesets (yn)?'
356 358 '$$ &Yes $$ &No') % len(published)):
357 359 raise error.Abort(_('user quit'))
358 360 elif behavior == 'abort':
359 361 msg = _('push would publish %i changesets') % len(published)
360 362 hint = _("use --publish or adjust 'experimental.auto-publish'"
361 363 " config")
362 364 raise error.Abort(msg, hint=hint)
363 365
364 366 def _forcebundle1(op):
365 367 """return true if a pull/push must use bundle1
366 368
367 369 This function is used to allow testing of the older bundle version"""
368 370 ui = op.repo.ui
369 371 # The goal is this config is to allow developer to choose the bundle
370 372 # version used during exchanged. This is especially handy during test.
371 373 # Value is a list of bundle version to be picked from, highest version
372 374 # should be used.
373 375 #
374 376 # developer config: devel.legacy.exchange
375 377 exchange = ui.configlist('devel', 'legacy.exchange')
376 378 forcebundle1 = 'bundle2' not in exchange and 'bundle1' in exchange
377 379 return forcebundle1 or not op.remote.capable('bundle2')
378 380
379 381 class pushoperation(object):
380 382 """A object that represent a single push operation
381 383
382 384 Its purpose is to carry push related state and very common operations.
383 385
384 386 A new pushoperation should be created at the beginning of each push and
385 387 discarded afterward.
386 388 """
387 389
388 390 def __init__(self, repo, remote, force=False, revs=None, newbranch=False,
389 391 bookmarks=(), publish=False, pushvars=None):
390 392 # repo we push from
391 393 self.repo = repo
392 394 self.ui = repo.ui
393 395 # repo we push to
394 396 self.remote = remote
395 397 # force option provided
396 398 self.force = force
397 399 # revs to be pushed (None is "all")
398 400 self.revs = revs
399 401 # bookmark explicitly pushed
400 402 self.bookmarks = bookmarks
401 403 # allow push of new branch
402 404 self.newbranch = newbranch
403 405 # step already performed
404 406 # (used to check what steps have been already performed through bundle2)
405 407 self.stepsdone = set()
406 408 # Integer version of the changegroup push result
407 409 # - None means nothing to push
408 410 # - 0 means HTTP error
409 411 # - 1 means we pushed and remote head count is unchanged *or*
410 412 # we have outgoing changesets but refused to push
411 413 # - other values as described by addchangegroup()
412 414 self.cgresult = None
413 415 # Boolean value for the bookmark push
414 416 self.bkresult = None
415 417 # discover.outgoing object (contains common and outgoing data)
416 418 self.outgoing = None
417 419 # all remote topological heads before the push
418 420 self.remoteheads = None
419 421 # Details of the remote branch pre and post push
420 422 #
421 423 # mapping: {'branch': ([remoteheads],
422 424 # [newheads],
423 425 # [unsyncedheads],
424 426 # [discardedheads])}
425 427 # - branch: the branch name
426 428 # - remoteheads: the list of remote heads known locally
427 429 # None if the branch is new
428 430 # - newheads: the new remote heads (known locally) with outgoing pushed
429 431 # - unsyncedheads: the list of remote heads unknown locally.
430 432 # - discardedheads: the list of remote heads made obsolete by the push
431 433 self.pushbranchmap = None
432 434 # testable as a boolean indicating if any nodes are missing locally.
433 435 self.incoming = None
434 436 # summary of the remote phase situation
435 437 self.remotephases = None
436 438 # phases changes that must be pushed along side the changesets
437 439 self.outdatedphases = None
438 440 # phases changes that must be pushed if changeset push fails
439 441 self.fallbackoutdatedphases = None
440 442 # outgoing obsmarkers
441 443 self.outobsmarkers = set()
442 444 # outgoing bookmarks
443 445 self.outbookmarks = []
444 446 # transaction manager
445 447 self.trmanager = None
446 448 # map { pushkey partid -> callback handling failure}
447 449 # used to handle exception from mandatory pushkey part failure
448 450 self.pkfailcb = {}
449 451 # an iterable of pushvars or None
450 452 self.pushvars = pushvars
451 453 # publish pushed changesets
452 454 self.publish = publish
453 455
454 456 @util.propertycache
455 457 def futureheads(self):
456 458 """future remote heads if the changeset push succeeds"""
457 459 return self.outgoing.missingheads
458 460
459 461 @util.propertycache
460 462 def fallbackheads(self):
461 463 """future remote heads if the changeset push fails"""
462 464 if self.revs is None:
463 465 # not target to push, all common are relevant
464 466 return self.outgoing.commonheads
465 467 unfi = self.repo.unfiltered()
466 468 # I want cheads = heads(::missingheads and ::commonheads)
467 469 # (missingheads is revs with secret changeset filtered out)
468 470 #
469 471 # This can be expressed as:
470 472 # cheads = ( (missingheads and ::commonheads)
471 473 # + (commonheads and ::missingheads))"
472 474 # )
473 475 #
474 476 # while trying to push we already computed the following:
475 477 # common = (::commonheads)
476 478 # missing = ((commonheads::missingheads) - commonheads)
477 479 #
478 480 # We can pick:
479 481 # * missingheads part of common (::commonheads)
480 482 common = self.outgoing.common
481 483 nm = self.repo.changelog.nodemap
482 484 cheads = [node for node in self.revs if nm[node] in common]
483 485 # and
484 486 # * commonheads parents on missing
485 487 revset = unfi.set('%ln and parents(roots(%ln))',
486 488 self.outgoing.commonheads,
487 489 self.outgoing.missing)
488 490 cheads.extend(c.node() for c in revset)
489 491 return cheads
490 492
491 493 @property
492 494 def commonheads(self):
493 495 """set of all common heads after changeset bundle push"""
494 496 if self.cgresult:
495 497 return self.futureheads
496 498 else:
497 499 return self.fallbackheads
498 500
499 501 # mapping of message used when pushing bookmark
500 502 bookmsgmap = {'update': (_("updating bookmark %s\n"),
501 503 _('updating bookmark %s failed!\n')),
502 504 'export': (_("exporting bookmark %s\n"),
503 505 _('exporting bookmark %s failed!\n')),
504 506 'delete': (_("deleting remote bookmark %s\n"),
505 507 _('deleting remote bookmark %s failed!\n')),
506 508 }
507 509
508 510
509 511 def push(repo, remote, force=False, revs=None, newbranch=False, bookmarks=(),
510 512 publish=False, opargs=None):
511 513 '''Push outgoing changesets (limited by revs) from a local
512 514 repository to remote. Return an integer:
513 515 - None means nothing to push
514 516 - 0 means HTTP error
515 517 - 1 means we pushed and remote head count is unchanged *or*
516 518 we have outgoing changesets but refused to push
517 519 - other values as described by addchangegroup()
518 520 '''
519 521 if opargs is None:
520 522 opargs = {}
521 523 pushop = pushoperation(repo, remote, force, revs, newbranch, bookmarks,
522 524 publish, **pycompat.strkwargs(opargs))
523 525 if pushop.remote.local():
524 526 missing = (set(pushop.repo.requirements)
525 527 - pushop.remote.local().supported)
526 528 if missing:
527 529 msg = _("required features are not"
528 530 " supported in the destination:"
529 531 " %s") % (', '.join(sorted(missing)))
530 532 raise error.Abort(msg)
531 533
532 534 if not pushop.remote.canpush():
533 535 raise error.Abort(_("destination does not support push"))
534 536
535 537 if not pushop.remote.capable('unbundle'):
536 538 raise error.Abort(_('cannot push: destination does not support the '
537 539 'unbundle wire protocol command'))
538 540
539 541 # get lock as we might write phase data
540 542 wlock = lock = None
541 543 try:
542 544 # bundle2 push may receive a reply bundle touching bookmarks
543 545 # requiring the wlock. Take it now to ensure proper ordering.
544 546 maypushback = pushop.ui.configbool('experimental', 'bundle2.pushback')
545 547 if ((not _forcebundle1(pushop)) and
546 548 maypushback and
547 549 not bookmod.bookmarksinstore(repo)):
548 550 wlock = pushop.repo.wlock()
549 551 lock = pushop.repo.lock()
550 552 pushop.trmanager = transactionmanager(pushop.repo,
551 553 'push-response',
552 554 pushop.remote.url())
553 555 except error.LockUnavailable as err:
554 556 # source repo cannot be locked.
555 557 # We do not abort the push, but just disable the local phase
556 558 # synchronisation.
557 559 msg = ('cannot lock source repository: %s\n'
558 560 % stringutil.forcebytestr(err))
559 561 pushop.ui.debug(msg)
560 562
561 563 with wlock or util.nullcontextmanager():
562 564 with lock or util.nullcontextmanager():
563 565 with pushop.trmanager or util.nullcontextmanager():
564 566 pushop.repo.checkpush(pushop)
565 567 _checkpublish(pushop)
566 568 _pushdiscovery(pushop)
567 569 if not _forcebundle1(pushop):
568 570 _pushbundle2(pushop)
569 571 _pushchangeset(pushop)
570 572 _pushsyncphase(pushop)
571 573 _pushobsolete(pushop)
572 574 _pushbookmark(pushop)
573 575
574 576 if repo.ui.configbool('experimental', 'remotenames'):
575 577 logexchange.pullremotenames(repo, remote)
576 578
577 579 return pushop
578 580
579 581 # list of steps to perform discovery before push
580 582 pushdiscoveryorder = []
581 583
582 584 # Mapping between step name and function
583 585 #
584 586 # This exists to help extensions wrap steps if necessary
585 587 pushdiscoverymapping = {}
586 588
587 589 def pushdiscovery(stepname):
588 590 """decorator for function performing discovery before push
589 591
590 592 The function is added to the step -> function mapping and appended to the
591 593 list of steps. Beware that decorated function will be added in order (this
592 594 may matter).
593 595
594 596 You can only use this decorator for a new step, if you want to wrap a step
595 597 from an extension, change the pushdiscovery dictionary directly."""
596 598 def dec(func):
597 599 assert stepname not in pushdiscoverymapping
598 600 pushdiscoverymapping[stepname] = func
599 601 pushdiscoveryorder.append(stepname)
600 602 return func
601 603 return dec
602 604
603 605 def _pushdiscovery(pushop):
604 606 """Run all discovery steps"""
605 607 for stepname in pushdiscoveryorder:
606 608 step = pushdiscoverymapping[stepname]
607 609 step(pushop)
608 610
609 611 @pushdiscovery('changeset')
610 612 def _pushdiscoverychangeset(pushop):
611 613 """discover the changeset that need to be pushed"""
612 614 fci = discovery.findcommonincoming
613 615 if pushop.revs:
614 616 commoninc = fci(pushop.repo, pushop.remote, force=pushop.force,
615 617 ancestorsof=pushop.revs)
616 618 else:
617 619 commoninc = fci(pushop.repo, pushop.remote, force=pushop.force)
618 620 common, inc, remoteheads = commoninc
619 621 fco = discovery.findcommonoutgoing
620 622 outgoing = fco(pushop.repo, pushop.remote, onlyheads=pushop.revs,
621 623 commoninc=commoninc, force=pushop.force)
622 624 pushop.outgoing = outgoing
623 625 pushop.remoteheads = remoteheads
624 626 pushop.incoming = inc
625 627
626 628 @pushdiscovery('phase')
627 629 def _pushdiscoveryphase(pushop):
628 630 """discover the phase that needs to be pushed
629 631
630 632 (computed for both success and failure case for changesets push)"""
631 633 outgoing = pushop.outgoing
632 634 unfi = pushop.repo.unfiltered()
633 635 remotephases = listkeys(pushop.remote, 'phases')
634 636
635 637 if (pushop.ui.configbool('ui', '_usedassubrepo')
636 638 and remotephases # server supports phases
637 639 and not pushop.outgoing.missing # no changesets to be pushed
638 640 and remotephases.get('publishing', False)):
639 641 # When:
640 642 # - this is a subrepo push
641 643 # - and remote support phase
642 644 # - and no changeset are to be pushed
643 645 # - and remote is publishing
644 646 # We may be in issue 3781 case!
645 647 # We drop the possible phase synchronisation done by
646 648 # courtesy to publish changesets possibly locally draft
647 649 # on the remote.
648 650 pushop.outdatedphases = []
649 651 pushop.fallbackoutdatedphases = []
650 652 return
651 653
652 654 pushop.remotephases = phases.remotephasessummary(pushop.repo,
653 655 pushop.fallbackheads,
654 656 remotephases)
655 657 droots = pushop.remotephases.draftroots
656 658
657 659 extracond = ''
658 660 if not pushop.remotephases.publishing:
659 661 extracond = ' and public()'
660 662 revset = 'heads((%%ln::%%ln) %s)' % extracond
661 663 # Get the list of all revs draft on remote by public here.
662 664 # XXX Beware that revset break if droots is not strictly
663 665 # XXX root we may want to ensure it is but it is costly
664 666 fallback = list(unfi.set(revset, droots, pushop.fallbackheads))
665 667 if not pushop.remotephases.publishing and pushop.publish:
666 668 future = list(unfi.set('%ln and (not public() or %ln::)',
667 669 pushop.futureheads, droots))
668 670 elif not outgoing.missing:
669 671 future = fallback
670 672 else:
671 673 # adds changeset we are going to push as draft
672 674 #
673 675 # should not be necessary for publishing server, but because of an
674 676 # issue fixed in xxxxx we have to do it anyway.
675 677 fdroots = list(unfi.set('roots(%ln + %ln::)',
676 678 outgoing.missing, droots))
677 679 fdroots = [f.node() for f in fdroots]
678 680 future = list(unfi.set(revset, fdroots, pushop.futureheads))
679 681 pushop.outdatedphases = future
680 682 pushop.fallbackoutdatedphases = fallback
681 683
682 684 @pushdiscovery('obsmarker')
683 685 def _pushdiscoveryobsmarkers(pushop):
684 686 if not obsolete.isenabled(pushop.repo, obsolete.exchangeopt):
685 687 return
686 688
687 689 if not pushop.repo.obsstore:
688 690 return
689 691
690 692 if 'obsolete' not in listkeys(pushop.remote, 'namespaces'):
691 693 return
692 694
693 695 repo = pushop.repo
694 696 # very naive computation, that can be quite expensive on big repo.
695 697 # However: evolution is currently slow on them anyway.
696 698 nodes = (c.node() for c in repo.set('::%ln', pushop.futureheads))
697 699 pushop.outobsmarkers = pushop.repo.obsstore.relevantmarkers(nodes)
698 700
699 701 @pushdiscovery('bookmarks')
700 702 def _pushdiscoverybookmarks(pushop):
701 703 ui = pushop.ui
702 704 repo = pushop.repo.unfiltered()
703 705 remote = pushop.remote
704 706 ui.debug("checking for updated bookmarks\n")
705 707 ancestors = ()
706 708 if pushop.revs:
707 709 revnums = pycompat.maplist(repo.changelog.rev, pushop.revs)
708 710 ancestors = repo.changelog.ancestors(revnums, inclusive=True)
709 711
710 712 remotebookmark = listkeys(remote, 'bookmarks')
711 713
712 714 explicit = {repo._bookmarks.expandname(bookmark)
713 715 for bookmark in pushop.bookmarks}
714 716
715 717 remotebookmark = bookmod.unhexlifybookmarks(remotebookmark)
716 718 comp = bookmod.comparebookmarks(repo, repo._bookmarks, remotebookmark)
717 719
718 720 def safehex(x):
719 721 if x is None:
720 722 return x
721 723 return hex(x)
722 724
723 725 def hexifycompbookmarks(bookmarks):
724 726 return [(b, safehex(scid), safehex(dcid))
725 727 for (b, scid, dcid) in bookmarks]
726 728
727 729 comp = [hexifycompbookmarks(marks) for marks in comp]
728 730 return _processcompared(pushop, ancestors, explicit, remotebookmark, comp)
729 731
730 732 def _processcompared(pushop, pushed, explicit, remotebms, comp):
731 733 """take decision on bookmark to pull from the remote bookmark
732 734
733 735 Exist to help extensions who want to alter this behavior.
734 736 """
735 737 addsrc, adddst, advsrc, advdst, diverge, differ, invalid, same = comp
736 738
737 739 repo = pushop.repo
738 740
739 741 for b, scid, dcid in advsrc:
740 742 if b in explicit:
741 743 explicit.remove(b)
742 744 if not pushed or repo[scid].rev() in pushed:
743 745 pushop.outbookmarks.append((b, dcid, scid))
744 746 # search added bookmark
745 747 for b, scid, dcid in addsrc:
746 748 if b in explicit:
747 749 explicit.remove(b)
748 750 pushop.outbookmarks.append((b, '', scid))
749 751 # search for overwritten bookmark
750 752 for b, scid, dcid in list(advdst) + list(diverge) + list(differ):
751 753 if b in explicit:
752 754 explicit.remove(b)
753 755 pushop.outbookmarks.append((b, dcid, scid))
754 756 # search for bookmark to delete
755 757 for b, scid, dcid in adddst:
756 758 if b in explicit:
757 759 explicit.remove(b)
758 760 # treat as "deleted locally"
759 761 pushop.outbookmarks.append((b, dcid, ''))
760 762 # identical bookmarks shouldn't get reported
761 763 for b, scid, dcid in same:
762 764 if b in explicit:
763 765 explicit.remove(b)
764 766
765 767 if explicit:
766 768 explicit = sorted(explicit)
767 769 # we should probably list all of them
768 770 pushop.ui.warn(_('bookmark %s does not exist on the local '
769 771 'or remote repository!\n') % explicit[0])
770 772 pushop.bkresult = 2
771 773
772 774 pushop.outbookmarks.sort()
773 775
774 776 def _pushcheckoutgoing(pushop):
775 777 outgoing = pushop.outgoing
776 778 unfi = pushop.repo.unfiltered()
777 779 if not outgoing.missing:
778 780 # nothing to push
779 781 scmutil.nochangesfound(unfi.ui, unfi, outgoing.excluded)
780 782 return False
781 783 # something to push
782 784 if not pushop.force:
783 785 # if repo.obsstore == False --> no obsolete
784 786 # then, save the iteration
785 787 if unfi.obsstore:
786 788 # this message are here for 80 char limit reason
787 789 mso = _("push includes obsolete changeset: %s!")
788 790 mspd = _("push includes phase-divergent changeset: %s!")
789 791 mscd = _("push includes content-divergent changeset: %s!")
790 792 mst = {"orphan": _("push includes orphan changeset: %s!"),
791 793 "phase-divergent": mspd,
792 794 "content-divergent": mscd}
793 795 # If we are to push if there is at least one
794 796 # obsolete or unstable changeset in missing, at
795 797 # least one of the missinghead will be obsolete or
796 798 # unstable. So checking heads only is ok
797 799 for node in outgoing.missingheads:
798 800 ctx = unfi[node]
799 801 if ctx.obsolete():
800 802 raise error.Abort(mso % ctx)
801 803 elif ctx.isunstable():
802 804 # TODO print more than one instability in the abort
803 805 # message
804 806 raise error.Abort(mst[ctx.instabilities()[0]] % ctx)
805 807
806 808 discovery.checkheads(pushop)
807 809 return True
808 810
809 811 # List of names of steps to perform for an outgoing bundle2, order matters.
810 812 b2partsgenorder = []
811 813
812 814 # Mapping between step name and function
813 815 #
814 816 # This exists to help extensions wrap steps if necessary
815 817 b2partsgenmapping = {}
816 818
817 819 def b2partsgenerator(stepname, idx=None):
818 820 """decorator for function generating bundle2 part
819 821
820 822 The function is added to the step -> function mapping and appended to the
821 823 list of steps. Beware that decorated functions will be added in order
822 824 (this may matter).
823 825
824 826 You can only use this decorator for new steps, if you want to wrap a step
825 827 from an extension, attack the b2partsgenmapping dictionary directly."""
826 828 def dec(func):
827 829 assert stepname not in b2partsgenmapping
828 830 b2partsgenmapping[stepname] = func
829 831 if idx is None:
830 832 b2partsgenorder.append(stepname)
831 833 else:
832 834 b2partsgenorder.insert(idx, stepname)
833 835 return func
834 836 return dec
835 837
836 838 def _pushb2ctxcheckheads(pushop, bundler):
837 839 """Generate race condition checking parts
838 840
839 841 Exists as an independent function to aid extensions
840 842 """
841 843 # * 'force' do not check for push race,
842 844 # * if we don't push anything, there are nothing to check.
843 845 if not pushop.force and pushop.outgoing.missingheads:
844 846 allowunrelated = 'related' in bundler.capabilities.get('checkheads', ())
845 847 emptyremote = pushop.pushbranchmap is None
846 848 if not allowunrelated or emptyremote:
847 849 bundler.newpart('check:heads', data=iter(pushop.remoteheads))
848 850 else:
849 851 affected = set()
850 852 for branch, heads in pushop.pushbranchmap.iteritems():
851 853 remoteheads, newheads, unsyncedheads, discardedheads = heads
852 854 if remoteheads is not None:
853 855 remote = set(remoteheads)
854 856 affected |= set(discardedheads) & remote
855 857 affected |= remote - set(newheads)
856 858 if affected:
857 859 data = iter(sorted(affected))
858 860 bundler.newpart('check:updated-heads', data=data)
859 861
860 862 def _pushing(pushop):
861 863 """return True if we are pushing anything"""
862 864 return bool(pushop.outgoing.missing
863 865 or pushop.outdatedphases
864 866 or pushop.outobsmarkers
865 867 or pushop.outbookmarks)
866 868
867 869 @b2partsgenerator('check-bookmarks')
868 870 def _pushb2checkbookmarks(pushop, bundler):
869 871 """insert bookmark move checking"""
870 872 if not _pushing(pushop) or pushop.force:
871 873 return
872 874 b2caps = bundle2.bundle2caps(pushop.remote)
873 875 hasbookmarkcheck = 'bookmarks' in b2caps
874 876 if not (pushop.outbookmarks and hasbookmarkcheck):
875 877 return
876 878 data = []
877 879 for book, old, new in pushop.outbookmarks:
878 880 old = bin(old)
879 881 data.append((book, old))
880 882 checkdata = bookmod.binaryencode(data)
881 883 bundler.newpart('check:bookmarks', data=checkdata)
882 884
883 885 @b2partsgenerator('check-phases')
884 886 def _pushb2checkphases(pushop, bundler):
885 887 """insert phase move checking"""
886 888 if not _pushing(pushop) or pushop.force:
887 889 return
888 890 b2caps = bundle2.bundle2caps(pushop.remote)
889 891 hasphaseheads = 'heads' in b2caps.get('phases', ())
890 892 if pushop.remotephases is not None and hasphaseheads:
891 893 # check that the remote phase has not changed
892 894 checks = [[] for p in phases.allphases]
893 895 checks[phases.public].extend(pushop.remotephases.publicheads)
894 896 checks[phases.draft].extend(pushop.remotephases.draftroots)
895 897 if any(checks):
896 898 for nodes in checks:
897 899 nodes.sort()
898 900 checkdata = phases.binaryencode(checks)
899 901 bundler.newpart('check:phases', data=checkdata)
900 902
901 903 @b2partsgenerator('changeset')
902 904 def _pushb2ctx(pushop, bundler):
903 905 """handle changegroup push through bundle2
904 906
905 907 addchangegroup result is stored in the ``pushop.cgresult`` attribute.
906 908 """
907 909 if 'changesets' in pushop.stepsdone:
908 910 return
909 911 pushop.stepsdone.add('changesets')
910 912 # Send known heads to the server for race detection.
911 913 if not _pushcheckoutgoing(pushop):
912 914 return
913 915 pushop.repo.prepushoutgoinghooks(pushop)
914 916
915 917 _pushb2ctxcheckheads(pushop, bundler)
916 918
917 919 b2caps = bundle2.bundle2caps(pushop.remote)
918 920 version = '01'
919 921 cgversions = b2caps.get('changegroup')
920 922 if cgversions: # 3.1 and 3.2 ship with an empty value
921 923 cgversions = [v for v in cgversions
922 924 if v in changegroup.supportedoutgoingversions(
923 925 pushop.repo)]
924 926 if not cgversions:
925 927 raise error.Abort(_('no common changegroup version'))
926 928 version = max(cgversions)
927 929 cgstream = changegroup.makestream(pushop.repo, pushop.outgoing, version,
928 930 'push')
929 931 cgpart = bundler.newpart('changegroup', data=cgstream)
930 932 if cgversions:
931 933 cgpart.addparam('version', version)
932 934 if 'treemanifest' in pushop.repo.requirements:
933 935 cgpart.addparam('treemanifest', '1')
934 936 def handlereply(op):
935 937 """extract addchangegroup returns from server reply"""
936 938 cgreplies = op.records.getreplies(cgpart.id)
937 939 assert len(cgreplies['changegroup']) == 1
938 940 pushop.cgresult = cgreplies['changegroup'][0]['return']
939 941 return handlereply
940 942
941 943 @b2partsgenerator('phase')
942 944 def _pushb2phases(pushop, bundler):
943 945 """handle phase push through bundle2"""
944 946 if 'phases' in pushop.stepsdone:
945 947 return
946 948 b2caps = bundle2.bundle2caps(pushop.remote)
947 949 ui = pushop.repo.ui
948 950
949 951 legacyphase = 'phases' in ui.configlist('devel', 'legacy.exchange')
950 952 haspushkey = 'pushkey' in b2caps
951 953 hasphaseheads = 'heads' in b2caps.get('phases', ())
952 954
953 955 if hasphaseheads and not legacyphase:
954 956 return _pushb2phaseheads(pushop, bundler)
955 957 elif haspushkey:
956 958 return _pushb2phasespushkey(pushop, bundler)
957 959
958 960 def _pushb2phaseheads(pushop, bundler):
959 961 """push phase information through a bundle2 - binary part"""
960 962 pushop.stepsdone.add('phases')
961 963 if pushop.outdatedphases:
962 964 updates = [[] for p in phases.allphases]
963 965 updates[0].extend(h.node() for h in pushop.outdatedphases)
964 966 phasedata = phases.binaryencode(updates)
965 967 bundler.newpart('phase-heads', data=phasedata)
966 968
967 969 def _pushb2phasespushkey(pushop, bundler):
968 970 """push phase information through a bundle2 - pushkey part"""
969 971 pushop.stepsdone.add('phases')
970 972 part2node = []
971 973
972 974 def handlefailure(pushop, exc):
973 975 targetid = int(exc.partid)
974 976 for partid, node in part2node:
975 977 if partid == targetid:
976 978 raise error.Abort(_('updating %s to public failed') % node)
977 979
978 980 enc = pushkey.encode
979 981 for newremotehead in pushop.outdatedphases:
980 982 part = bundler.newpart('pushkey')
981 983 part.addparam('namespace', enc('phases'))
982 984 part.addparam('key', enc(newremotehead.hex()))
983 985 part.addparam('old', enc('%d' % phases.draft))
984 986 part.addparam('new', enc('%d' % phases.public))
985 987 part2node.append((part.id, newremotehead))
986 988 pushop.pkfailcb[part.id] = handlefailure
987 989
988 990 def handlereply(op):
989 991 for partid, node in part2node:
990 992 partrep = op.records.getreplies(partid)
991 993 results = partrep['pushkey']
992 994 assert len(results) <= 1
993 995 msg = None
994 996 if not results:
995 997 msg = _('server ignored update of %s to public!\n') % node
996 998 elif not int(results[0]['return']):
997 999 msg = _('updating %s to public failed!\n') % node
998 1000 if msg is not None:
999 1001 pushop.ui.warn(msg)
1000 1002 return handlereply
1001 1003
1002 1004 @b2partsgenerator('obsmarkers')
1003 1005 def _pushb2obsmarkers(pushop, bundler):
1004 1006 if 'obsmarkers' in pushop.stepsdone:
1005 1007 return
1006 1008 remoteversions = bundle2.obsmarkersversion(bundler.capabilities)
1007 1009 if obsolete.commonversion(remoteversions) is None:
1008 1010 return
1009 1011 pushop.stepsdone.add('obsmarkers')
1010 1012 if pushop.outobsmarkers:
1011 1013 markers = sorted(pushop.outobsmarkers)
1012 1014 bundle2.buildobsmarkerspart(bundler, markers)
1013 1015
1014 1016 @b2partsgenerator('bookmarks')
1015 1017 def _pushb2bookmarks(pushop, bundler):
1016 1018 """handle bookmark push through bundle2"""
1017 1019 if 'bookmarks' in pushop.stepsdone:
1018 1020 return
1019 1021 b2caps = bundle2.bundle2caps(pushop.remote)
1020 1022
1021 1023 legacy = pushop.repo.ui.configlist('devel', 'legacy.exchange')
1022 1024 legacybooks = 'bookmarks' in legacy
1023 1025
1024 1026 if not legacybooks and 'bookmarks' in b2caps:
1025 1027 return _pushb2bookmarkspart(pushop, bundler)
1026 1028 elif 'pushkey' in b2caps:
1027 1029 return _pushb2bookmarkspushkey(pushop, bundler)
1028 1030
1029 1031 def _bmaction(old, new):
1030 1032 """small utility for bookmark pushing"""
1031 1033 if not old:
1032 1034 return 'export'
1033 1035 elif not new:
1034 1036 return 'delete'
1035 1037 return 'update'
1036 1038
1037 1039 def _pushb2bookmarkspart(pushop, bundler):
1038 1040 pushop.stepsdone.add('bookmarks')
1039 1041 if not pushop.outbookmarks:
1040 1042 return
1041 1043
1042 1044 allactions = []
1043 1045 data = []
1044 1046 for book, old, new in pushop.outbookmarks:
1045 1047 new = bin(new)
1046 1048 data.append((book, new))
1047 1049 allactions.append((book, _bmaction(old, new)))
1048 1050 checkdata = bookmod.binaryencode(data)
1049 1051 bundler.newpart('bookmarks', data=checkdata)
1050 1052
1051 1053 def handlereply(op):
1052 1054 ui = pushop.ui
1053 1055 # if success
1054 1056 for book, action in allactions:
1055 1057 ui.status(bookmsgmap[action][0] % book)
1056 1058
1057 1059 return handlereply
1058 1060
1059 1061 def _pushb2bookmarkspushkey(pushop, bundler):
1060 1062 pushop.stepsdone.add('bookmarks')
1061 1063 part2book = []
1062 1064 enc = pushkey.encode
1063 1065
1064 1066 def handlefailure(pushop, exc):
1065 1067 targetid = int(exc.partid)
1066 1068 for partid, book, action in part2book:
1067 1069 if partid == targetid:
1068 1070 raise error.Abort(bookmsgmap[action][1].rstrip() % book)
1069 1071 # we should not be called for part we did not generated
1070 1072 assert False
1071 1073
1072 1074 for book, old, new in pushop.outbookmarks:
1073 1075 part = bundler.newpart('pushkey')
1074 1076 part.addparam('namespace', enc('bookmarks'))
1075 1077 part.addparam('key', enc(book))
1076 1078 part.addparam('old', enc(old))
1077 1079 part.addparam('new', enc(new))
1078 1080 action = 'update'
1079 1081 if not old:
1080 1082 action = 'export'
1081 1083 elif not new:
1082 1084 action = 'delete'
1083 1085 part2book.append((part.id, book, action))
1084 1086 pushop.pkfailcb[part.id] = handlefailure
1085 1087
1086 1088 def handlereply(op):
1087 1089 ui = pushop.ui
1088 1090 for partid, book, action in part2book:
1089 1091 partrep = op.records.getreplies(partid)
1090 1092 results = partrep['pushkey']
1091 1093 assert len(results) <= 1
1092 1094 if not results:
1093 1095 pushop.ui.warn(_('server ignored bookmark %s update\n') % book)
1094 1096 else:
1095 1097 ret = int(results[0]['return'])
1096 1098 if ret:
1097 1099 ui.status(bookmsgmap[action][0] % book)
1098 1100 else:
1099 1101 ui.warn(bookmsgmap[action][1] % book)
1100 1102 if pushop.bkresult is not None:
1101 1103 pushop.bkresult = 1
1102 1104 return handlereply
1103 1105
1104 1106 @b2partsgenerator('pushvars', idx=0)
1105 1107 def _getbundlesendvars(pushop, bundler):
1106 1108 '''send shellvars via bundle2'''
1107 1109 pushvars = pushop.pushvars
1108 1110 if pushvars:
1109 1111 shellvars = {}
1110 1112 for raw in pushvars:
1111 1113 if '=' not in raw:
1112 1114 msg = ("unable to parse variable '%s', should follow "
1113 1115 "'KEY=VALUE' or 'KEY=' format")
1114 1116 raise error.Abort(msg % raw)
1115 1117 k, v = raw.split('=', 1)
1116 1118 shellvars[k] = v
1117 1119
1118 1120 part = bundler.newpart('pushvars')
1119 1121
1120 1122 for key, value in shellvars.iteritems():
1121 1123 part.addparam(key, value, mandatory=False)
1122 1124
1123 1125 def _pushbundle2(pushop):
1124 1126 """push data to the remote using bundle2
1125 1127
1126 1128 The only currently supported type of data is changegroup but this will
1127 1129 evolve in the future."""
1128 1130 bundler = bundle2.bundle20(pushop.ui, bundle2.bundle2caps(pushop.remote))
1129 1131 pushback = (pushop.trmanager
1130 1132 and pushop.ui.configbool('experimental', 'bundle2.pushback'))
1131 1133
1132 1134 # create reply capability
1133 1135 capsblob = bundle2.encodecaps(bundle2.getrepocaps(pushop.repo,
1134 1136 allowpushback=pushback,
1135 1137 role='client'))
1136 1138 bundler.newpart('replycaps', data=capsblob)
1137 1139 replyhandlers = []
1138 1140 for partgenname in b2partsgenorder:
1139 1141 partgen = b2partsgenmapping[partgenname]
1140 1142 ret = partgen(pushop, bundler)
1141 1143 if callable(ret):
1142 1144 replyhandlers.append(ret)
1143 1145 # do not push if nothing to push
1144 1146 if bundler.nbparts <= 1:
1145 1147 return
1146 1148 stream = util.chunkbuffer(bundler.getchunks())
1147 1149 try:
1148 1150 try:
1149 1151 with pushop.remote.commandexecutor() as e:
1150 1152 reply = e.callcommand('unbundle', {
1151 1153 'bundle': stream,
1152 1154 'heads': ['force'],
1153 1155 'url': pushop.remote.url(),
1154 1156 }).result()
1155 1157 except error.BundleValueError as exc:
1156 1158 raise error.Abort(_('missing support for %s') % exc)
1157 1159 try:
1158 1160 trgetter = None
1159 1161 if pushback:
1160 1162 trgetter = pushop.trmanager.transaction
1161 1163 op = bundle2.processbundle(pushop.repo, reply, trgetter)
1162 1164 except error.BundleValueError as exc:
1163 1165 raise error.Abort(_('missing support for %s') % exc)
1164 1166 except bundle2.AbortFromPart as exc:
1165 1167 pushop.ui.status(_('remote: %s\n') % exc)
1166 1168 if exc.hint is not None:
1167 1169 pushop.ui.status(_('remote: %s\n') % ('(%s)' % exc.hint))
1168 1170 raise error.Abort(_('push failed on remote'))
1169 1171 except error.PushkeyFailed as exc:
1170 1172 partid = int(exc.partid)
1171 1173 if partid not in pushop.pkfailcb:
1172 1174 raise
1173 1175 pushop.pkfailcb[partid](pushop, exc)
1174 1176 for rephand in replyhandlers:
1175 1177 rephand(op)
1176 1178
1177 1179 def _pushchangeset(pushop):
1178 1180 """Make the actual push of changeset bundle to remote repo"""
1179 1181 if 'changesets' in pushop.stepsdone:
1180 1182 return
1181 1183 pushop.stepsdone.add('changesets')
1182 1184 if not _pushcheckoutgoing(pushop):
1183 1185 return
1184 1186
1185 1187 # Should have verified this in push().
1186 1188 assert pushop.remote.capable('unbundle')
1187 1189
1188 1190 pushop.repo.prepushoutgoinghooks(pushop)
1189 1191 outgoing = pushop.outgoing
1190 1192 # TODO: get bundlecaps from remote
1191 1193 bundlecaps = None
1192 1194 # create a changegroup from local
1193 1195 if pushop.revs is None and not (outgoing.excluded
1194 1196 or pushop.repo.changelog.filteredrevs):
1195 1197 # push everything,
1196 1198 # use the fast path, no race possible on push
1197 1199 cg = changegroup.makechangegroup(pushop.repo, outgoing, '01', 'push',
1198 1200 fastpath=True, bundlecaps=bundlecaps)
1199 1201 else:
1200 1202 cg = changegroup.makechangegroup(pushop.repo, outgoing, '01',
1201 1203 'push', bundlecaps=bundlecaps)
1202 1204
1203 1205 # apply changegroup to remote
1204 1206 # local repo finds heads on server, finds out what
1205 1207 # revs it must push. once revs transferred, if server
1206 1208 # finds it has different heads (someone else won
1207 1209 # commit/push race), server aborts.
1208 1210 if pushop.force:
1209 1211 remoteheads = ['force']
1210 1212 else:
1211 1213 remoteheads = pushop.remoteheads
1212 1214 # ssh: return remote's addchangegroup()
1213 1215 # http: return remote's addchangegroup() or 0 for error
1214 1216 pushop.cgresult = pushop.remote.unbundle(cg, remoteheads,
1215 1217 pushop.repo.url())
1216 1218
1217 1219 def _pushsyncphase(pushop):
1218 1220 """synchronise phase information locally and remotely"""
1219 1221 cheads = pushop.commonheads
1220 1222 # even when we don't push, exchanging phase data is useful
1221 1223 remotephases = listkeys(pushop.remote, 'phases')
1222 1224 if (pushop.ui.configbool('ui', '_usedassubrepo')
1223 1225 and remotephases # server supports phases
1224 1226 and pushop.cgresult is None # nothing was pushed
1225 1227 and remotephases.get('publishing', False)):
1226 1228 # When:
1227 1229 # - this is a subrepo push
1228 1230 # - and remote support phase
1229 1231 # - and no changeset was pushed
1230 1232 # - and remote is publishing
1231 1233 # We may be in issue 3871 case!
1232 1234 # We drop the possible phase synchronisation done by
1233 1235 # courtesy to publish changesets possibly locally draft
1234 1236 # on the remote.
1235 1237 remotephases = {'publishing': 'True'}
1236 1238 if not remotephases: # old server or public only reply from non-publishing
1237 1239 _localphasemove(pushop, cheads)
1238 1240 # don't push any phase data as there is nothing to push
1239 1241 else:
1240 1242 ana = phases.analyzeremotephases(pushop.repo, cheads,
1241 1243 remotephases)
1242 1244 pheads, droots = ana
1243 1245 ### Apply remote phase on local
1244 1246 if remotephases.get('publishing', False):
1245 1247 _localphasemove(pushop, cheads)
1246 1248 else: # publish = False
1247 1249 _localphasemove(pushop, pheads)
1248 1250 _localphasemove(pushop, cheads, phases.draft)
1249 1251 ### Apply local phase on remote
1250 1252
1251 1253 if pushop.cgresult:
1252 1254 if 'phases' in pushop.stepsdone:
1253 1255 # phases already pushed though bundle2
1254 1256 return
1255 1257 outdated = pushop.outdatedphases
1256 1258 else:
1257 1259 outdated = pushop.fallbackoutdatedphases
1258 1260
1259 1261 pushop.stepsdone.add('phases')
1260 1262
1261 1263 # filter heads already turned public by the push
1262 1264 outdated = [c for c in outdated if c.node() not in pheads]
1263 1265 # fallback to independent pushkey command
1264 1266 for newremotehead in outdated:
1265 1267 with pushop.remote.commandexecutor() as e:
1266 1268 r = e.callcommand('pushkey', {
1267 1269 'namespace': 'phases',
1268 1270 'key': newremotehead.hex(),
1269 1271 'old': '%d' % phases.draft,
1270 1272 'new': '%d' % phases.public
1271 1273 }).result()
1272 1274
1273 1275 if not r:
1274 1276 pushop.ui.warn(_('updating %s to public failed!\n')
1275 1277 % newremotehead)
1276 1278
1277 1279 def _localphasemove(pushop, nodes, phase=phases.public):
1278 1280 """move <nodes> to <phase> in the local source repo"""
1279 1281 if pushop.trmanager:
1280 1282 phases.advanceboundary(pushop.repo,
1281 1283 pushop.trmanager.transaction(),
1282 1284 phase,
1283 1285 nodes)
1284 1286 else:
1285 1287 # repo is not locked, do not change any phases!
1286 1288 # Informs the user that phases should have been moved when
1287 1289 # applicable.
1288 1290 actualmoves = [n for n in nodes if phase < pushop.repo[n].phase()]
1289 1291 phasestr = phases.phasenames[phase]
1290 1292 if actualmoves:
1291 1293 pushop.ui.status(_('cannot lock source repo, skipping '
1292 1294 'local %s phase update\n') % phasestr)
1293 1295
1294 1296 def _pushobsolete(pushop):
1295 1297 """utility function to push obsolete markers to a remote"""
1296 1298 if 'obsmarkers' in pushop.stepsdone:
1297 1299 return
1298 1300 repo = pushop.repo
1299 1301 remote = pushop.remote
1300 1302 pushop.stepsdone.add('obsmarkers')
1301 1303 if pushop.outobsmarkers:
1302 1304 pushop.ui.debug('try to push obsolete markers to remote\n')
1303 1305 rslts = []
1304 1306 remotedata = obsolete._pushkeyescape(sorted(pushop.outobsmarkers))
1305 1307 for key in sorted(remotedata, reverse=True):
1306 1308 # reverse sort to ensure we end with dump0
1307 1309 data = remotedata[key]
1308 1310 rslts.append(remote.pushkey('obsolete', key, '', data))
1309 1311 if [r for r in rslts if not r]:
1310 1312 msg = _('failed to push some obsolete markers!\n')
1311 1313 repo.ui.warn(msg)
1312 1314
1313 1315 def _pushbookmark(pushop):
1314 1316 """Update bookmark position on remote"""
1315 1317 if pushop.cgresult == 0 or 'bookmarks' in pushop.stepsdone:
1316 1318 return
1317 1319 pushop.stepsdone.add('bookmarks')
1318 1320 ui = pushop.ui
1319 1321 remote = pushop.remote
1320 1322
1321 1323 for b, old, new in pushop.outbookmarks:
1322 1324 action = 'update'
1323 1325 if not old:
1324 1326 action = 'export'
1325 1327 elif not new:
1326 1328 action = 'delete'
1327 1329
1328 1330 with remote.commandexecutor() as e:
1329 1331 r = e.callcommand('pushkey', {
1330 1332 'namespace': 'bookmarks',
1331 1333 'key': b,
1332 1334 'old': old,
1333 1335 'new': new,
1334 1336 }).result()
1335 1337
1336 1338 if r:
1337 1339 ui.status(bookmsgmap[action][0] % b)
1338 1340 else:
1339 1341 ui.warn(bookmsgmap[action][1] % b)
1340 1342 # discovery can have set the value form invalid entry
1341 1343 if pushop.bkresult is not None:
1342 1344 pushop.bkresult = 1
1343 1345
1344 1346 class pulloperation(object):
1345 1347 """A object that represent a single pull operation
1346 1348
1347 1349 It purpose is to carry pull related state and very common operation.
1348 1350
1349 1351 A new should be created at the beginning of each pull and discarded
1350 1352 afterward.
1351 1353 """
1352 1354
1353 1355 def __init__(self, repo, remote, heads=None, force=False, bookmarks=(),
1354 1356 remotebookmarks=None, streamclonerequested=None,
1355 1357 includepats=None, excludepats=None, depth=None):
1356 1358 # repo we pull into
1357 1359 self.repo = repo
1358 1360 # repo we pull from
1359 1361 self.remote = remote
1360 1362 # revision we try to pull (None is "all")
1361 1363 self.heads = heads
1362 1364 # bookmark pulled explicitly
1363 1365 self.explicitbookmarks = [repo._bookmarks.expandname(bookmark)
1364 1366 for bookmark in bookmarks]
1365 1367 # do we force pull?
1366 1368 self.force = force
1367 1369 # whether a streaming clone was requested
1368 1370 self.streamclonerequested = streamclonerequested
1369 1371 # transaction manager
1370 1372 self.trmanager = None
1371 1373 # set of common changeset between local and remote before pull
1372 1374 self.common = None
1373 1375 # set of pulled head
1374 1376 self.rheads = None
1375 1377 # list of missing changeset to fetch remotely
1376 1378 self.fetch = None
1377 1379 # remote bookmarks data
1378 1380 self.remotebookmarks = remotebookmarks
1379 1381 # result of changegroup pulling (used as return code by pull)
1380 1382 self.cgresult = None
1381 1383 # list of step already done
1382 1384 self.stepsdone = set()
1383 1385 # Whether we attempted a clone from pre-generated bundles.
1384 1386 self.clonebundleattempted = False
1385 1387 # Set of file patterns to include.
1386 1388 self.includepats = includepats
1387 1389 # Set of file patterns to exclude.
1388 1390 self.excludepats = excludepats
1389 1391 # Number of ancestor changesets to pull from each pulled head.
1390 1392 self.depth = depth
1391 1393
1392 1394 @util.propertycache
1393 1395 def pulledsubset(self):
1394 1396 """heads of the set of changeset target by the pull"""
1395 1397 # compute target subset
1396 1398 if self.heads is None:
1397 1399 # We pulled every thing possible
1398 1400 # sync on everything common
1399 1401 c = set(self.common)
1400 1402 ret = list(self.common)
1401 1403 for n in self.rheads:
1402 1404 if n not in c:
1403 1405 ret.append(n)
1404 1406 return ret
1405 1407 else:
1406 1408 # We pulled a specific subset
1407 1409 # sync on this subset
1408 1410 return self.heads
1409 1411
1410 1412 @util.propertycache
1411 1413 def canusebundle2(self):
1412 1414 return not _forcebundle1(self)
1413 1415
1414 1416 @util.propertycache
1415 1417 def remotebundle2caps(self):
1416 1418 return bundle2.bundle2caps(self.remote)
1417 1419
1418 1420 def gettransaction(self):
1419 1421 # deprecated; talk to trmanager directly
1420 1422 return self.trmanager.transaction()
1421 1423
1422 1424 class transactionmanager(util.transactional):
1423 1425 """An object to manage the life cycle of a transaction
1424 1426
1425 1427 It creates the transaction on demand and calls the appropriate hooks when
1426 1428 closing the transaction."""
1427 1429 def __init__(self, repo, source, url):
1428 1430 self.repo = repo
1429 1431 self.source = source
1430 1432 self.url = url
1431 1433 self._tr = None
1432 1434
1433 1435 def transaction(self):
1434 1436 """Return an open transaction object, constructing if necessary"""
1435 1437 if not self._tr:
1436 1438 trname = '%s\n%s' % (self.source, util.hidepassword(self.url))
1437 1439 self._tr = self.repo.transaction(trname)
1438 1440 self._tr.hookargs['source'] = self.source
1439 1441 self._tr.hookargs['url'] = self.url
1440 1442 return self._tr
1441 1443
1442 1444 def close(self):
1443 1445 """close transaction if created"""
1444 1446 if self._tr is not None:
1445 1447 self._tr.close()
1446 1448
1447 1449 def release(self):
1448 1450 """release transaction if created"""
1449 1451 if self._tr is not None:
1450 1452 self._tr.release()
1451 1453
1452 1454 def listkeys(remote, namespace):
1453 1455 with remote.commandexecutor() as e:
1454 1456 return e.callcommand('listkeys', {'namespace': namespace}).result()
1455 1457
1456 1458 def _fullpullbundle2(repo, pullop):
1457 1459 # The server may send a partial reply, i.e. when inlining
1458 1460 # pre-computed bundles. In that case, update the common
1459 1461 # set based on the results and pull another bundle.
1460 1462 #
1461 1463 # There are two indicators that the process is finished:
1462 1464 # - no changeset has been added, or
1463 1465 # - all remote heads are known locally.
1464 1466 # The head check must use the unfiltered view as obsoletion
1465 1467 # markers can hide heads.
1466 1468 unfi = repo.unfiltered()
1467 1469 unficl = unfi.changelog
1468 1470 def headsofdiff(h1, h2):
1469 1471 """Returns heads(h1 % h2)"""
1470 1472 res = unfi.set('heads(%ln %% %ln)', h1, h2)
1471 1473 return set(ctx.node() for ctx in res)
1472 1474 def headsofunion(h1, h2):
1473 1475 """Returns heads((h1 + h2) - null)"""
1474 1476 res = unfi.set('heads((%ln + %ln - null))', h1, h2)
1475 1477 return set(ctx.node() for ctx in res)
1476 1478 while True:
1477 1479 old_heads = unficl.heads()
1478 1480 clstart = len(unficl)
1479 1481 _pullbundle2(pullop)
1480 1482 if repository.NARROW_REQUIREMENT in repo.requirements:
1481 1483 # XXX narrow clones filter the heads on the server side during
1482 1484 # XXX getbundle and result in partial replies as well.
1483 1485 # XXX Disable pull bundles in this case as band aid to avoid
1484 1486 # XXX extra round trips.
1485 1487 break
1486 1488 if clstart == len(unficl):
1487 1489 break
1488 1490 if all(unficl.hasnode(n) for n in pullop.rheads):
1489 1491 break
1490 1492 new_heads = headsofdiff(unficl.heads(), old_heads)
1491 1493 pullop.common = headsofunion(new_heads, pullop.common)
1492 1494 pullop.rheads = set(pullop.rheads) - pullop.common
1493 1495
1494 1496 def pull(repo, remote, heads=None, force=False, bookmarks=(), opargs=None,
1495 1497 streamclonerequested=None, includepats=None, excludepats=None,
1496 1498 depth=None):
1497 1499 """Fetch repository data from a remote.
1498 1500
1499 1501 This is the main function used to retrieve data from a remote repository.
1500 1502
1501 1503 ``repo`` is the local repository to clone into.
1502 1504 ``remote`` is a peer instance.
1503 1505 ``heads`` is an iterable of revisions we want to pull. ``None`` (the
1504 1506 default) means to pull everything from the remote.
1505 1507 ``bookmarks`` is an iterable of bookmarks requesting to be pulled. By
1506 1508 default, all remote bookmarks are pulled.
1507 1509 ``opargs`` are additional keyword arguments to pass to ``pulloperation``
1508 1510 initialization.
1509 1511 ``streamclonerequested`` is a boolean indicating whether a "streaming
1510 1512 clone" is requested. A "streaming clone" is essentially a raw file copy
1511 1513 of revlogs from the server. This only works when the local repository is
1512 1514 empty. The default value of ``None`` means to respect the server
1513 1515 configuration for preferring stream clones.
1514 1516 ``includepats`` and ``excludepats`` define explicit file patterns to
1515 1517 include and exclude in storage, respectively. If not defined, narrow
1516 1518 patterns from the repo instance are used, if available.
1517 1519 ``depth`` is an integer indicating the DAG depth of history we're
1518 1520 interested in. If defined, for each revision specified in ``heads``, we
1519 1521 will fetch up to this many of its ancestors and data associated with them.
1520 1522
1521 1523 Returns the ``pulloperation`` created for this pull.
1522 1524 """
1523 1525 if opargs is None:
1524 1526 opargs = {}
1525 1527
1526 1528 # We allow the narrow patterns to be passed in explicitly to provide more
1527 1529 # flexibility for API consumers.
1528 1530 if includepats or excludepats:
1529 1531 includepats = includepats or set()
1530 1532 excludepats = excludepats or set()
1531 1533 else:
1532 1534 includepats, excludepats = repo.narrowpats
1533 1535
1534 1536 narrowspec.validatepatterns(includepats)
1535 1537 narrowspec.validatepatterns(excludepats)
1536 1538
1537 1539 pullop = pulloperation(repo, remote, heads, force, bookmarks=bookmarks,
1538 1540 streamclonerequested=streamclonerequested,
1539 1541 includepats=includepats, excludepats=excludepats,
1540 1542 depth=depth,
1541 1543 **pycompat.strkwargs(opargs))
1542 1544
1543 1545 peerlocal = pullop.remote.local()
1544 1546 if peerlocal:
1545 1547 missing = set(peerlocal.requirements) - pullop.repo.supported
1546 1548 if missing:
1547 1549 msg = _("required features are not"
1548 1550 " supported in the destination:"
1549 1551 " %s") % (', '.join(sorted(missing)))
1550 1552 raise error.Abort(msg)
1551 1553
1552 1554 pullop.trmanager = transactionmanager(repo, 'pull', remote.url())
1553 1555 wlock = util.nullcontextmanager()
1554 1556 if not bookmod.bookmarksinstore(repo):
1555 1557 wlock = repo.wlock()
1556 1558 with wlock, repo.lock(), pullop.trmanager:
1557 1559 # Use the modern wire protocol, if available.
1558 1560 if remote.capable('command-changesetdata'):
1559 1561 exchangev2.pull(pullop)
1560 1562 else:
1561 1563 # This should ideally be in _pullbundle2(). However, it needs to run
1562 1564 # before discovery to avoid extra work.
1563 1565 _maybeapplyclonebundle(pullop)
1564 1566 streamclone.maybeperformlegacystreamclone(pullop)
1565 1567 _pulldiscovery(pullop)
1566 1568 if pullop.canusebundle2:
1567 1569 _fullpullbundle2(repo, pullop)
1568 1570 _pullchangeset(pullop)
1569 1571 _pullphase(pullop)
1570 1572 _pullbookmarks(pullop)
1571 1573 _pullobsolete(pullop)
1572 1574
1573 1575 # storing remotenames
1574 1576 if repo.ui.configbool('experimental', 'remotenames'):
1575 1577 logexchange.pullremotenames(repo, remote)
1576 1578
1577 1579 return pullop
1578 1580
1579 1581 # list of steps to perform discovery before pull
1580 1582 pulldiscoveryorder = []
1581 1583
1582 1584 # Mapping between step name and function
1583 1585 #
1584 1586 # This exists to help extensions wrap steps if necessary
1585 1587 pulldiscoverymapping = {}
1586 1588
1587 1589 def pulldiscovery(stepname):
1588 1590 """decorator for function performing discovery before pull
1589 1591
1590 1592 The function is added to the step -> function mapping and appended to the
1591 1593 list of steps. Beware that decorated function will be added in order (this
1592 1594 may matter).
1593 1595
1594 1596 You can only use this decorator for a new step, if you want to wrap a step
1595 1597 from an extension, change the pulldiscovery dictionary directly."""
1596 1598 def dec(func):
1597 1599 assert stepname not in pulldiscoverymapping
1598 1600 pulldiscoverymapping[stepname] = func
1599 1601 pulldiscoveryorder.append(stepname)
1600 1602 return func
1601 1603 return dec
1602 1604
1603 1605 def _pulldiscovery(pullop):
1604 1606 """Run all discovery steps"""
1605 1607 for stepname in pulldiscoveryorder:
1606 1608 step = pulldiscoverymapping[stepname]
1607 1609 step(pullop)
1608 1610
1609 1611 @pulldiscovery('b1:bookmarks')
1610 1612 def _pullbookmarkbundle1(pullop):
1611 1613 """fetch bookmark data in bundle1 case
1612 1614
1613 1615 If not using bundle2, we have to fetch bookmarks before changeset
1614 1616 discovery to reduce the chance and impact of race conditions."""
1615 1617 if pullop.remotebookmarks is not None:
1616 1618 return
1617 1619 if pullop.canusebundle2 and 'listkeys' in pullop.remotebundle2caps:
1618 1620 # all known bundle2 servers now support listkeys, but lets be nice with
1619 1621 # new implementation.
1620 1622 return
1621 1623 books = listkeys(pullop.remote, 'bookmarks')
1622 1624 pullop.remotebookmarks = bookmod.unhexlifybookmarks(books)
1623 1625
1624 1626
1625 1627 @pulldiscovery('changegroup')
1626 1628 def _pulldiscoverychangegroup(pullop):
1627 1629 """discovery phase for the pull
1628 1630
1629 1631 Current handle changeset discovery only, will change handle all discovery
1630 1632 at some point."""
1631 1633 tmp = discovery.findcommonincoming(pullop.repo,
1632 1634 pullop.remote,
1633 1635 heads=pullop.heads,
1634 1636 force=pullop.force)
1635 1637 common, fetch, rheads = tmp
1636 1638 nm = pullop.repo.unfiltered().changelog.nodemap
1637 1639 if fetch and rheads:
1638 1640 # If a remote heads is filtered locally, put in back in common.
1639 1641 #
1640 1642 # This is a hackish solution to catch most of "common but locally
1641 1643 # hidden situation". We do not performs discovery on unfiltered
1642 1644 # repository because it end up doing a pathological amount of round
1643 1645 # trip for w huge amount of changeset we do not care about.
1644 1646 #
1645 1647 # If a set of such "common but filtered" changeset exist on the server
1646 1648 # but are not including a remote heads, we'll not be able to detect it,
1647 1649 scommon = set(common)
1648 1650 for n in rheads:
1649 1651 if n in nm:
1650 1652 if n not in scommon:
1651 1653 common.append(n)
1652 1654 if set(rheads).issubset(set(common)):
1653 1655 fetch = []
1654 1656 pullop.common = common
1655 1657 pullop.fetch = fetch
1656 1658 pullop.rheads = rheads
1657 1659
1658 1660 def _pullbundle2(pullop):
1659 1661 """pull data using bundle2
1660 1662
1661 1663 For now, the only supported data are changegroup."""
1662 1664 kwargs = {'bundlecaps': caps20to10(pullop.repo, role='client')}
1663 1665
1664 1666 # make ui easier to access
1665 1667 ui = pullop.repo.ui
1666 1668
1667 1669 # At the moment we don't do stream clones over bundle2. If that is
1668 1670 # implemented then here's where the check for that will go.
1669 1671 streaming = streamclone.canperformstreamclone(pullop, bundle2=True)[0]
1670 1672
1671 1673 # declare pull perimeters
1672 1674 kwargs['common'] = pullop.common
1673 1675 kwargs['heads'] = pullop.heads or pullop.rheads
1674 1676
1675 1677 # check server supports narrow and then adding includepats and excludepats
1676 1678 servernarrow = pullop.remote.capable(wireprototypes.NARROWCAP)
1677 1679 if servernarrow and pullop.includepats:
1678 1680 kwargs['includepats'] = pullop.includepats
1679 1681 if servernarrow and pullop.excludepats:
1680 1682 kwargs['excludepats'] = pullop.excludepats
1681 1683
1682 1684 if streaming:
1683 1685 kwargs['cg'] = False
1684 1686 kwargs['stream'] = True
1685 1687 pullop.stepsdone.add('changegroup')
1686 1688 pullop.stepsdone.add('phases')
1687 1689
1688 1690 else:
1689 1691 # pulling changegroup
1690 1692 pullop.stepsdone.add('changegroup')
1691 1693
1692 1694 kwargs['cg'] = pullop.fetch
1693 1695
1694 1696 legacyphase = 'phases' in ui.configlist('devel', 'legacy.exchange')
1695 1697 hasbinaryphase = 'heads' in pullop.remotebundle2caps.get('phases', ())
1696 1698 if (not legacyphase and hasbinaryphase):
1697 1699 kwargs['phases'] = True
1698 1700 pullop.stepsdone.add('phases')
1699 1701
1700 1702 if 'listkeys' in pullop.remotebundle2caps:
1701 1703 if 'phases' not in pullop.stepsdone:
1702 1704 kwargs['listkeys'] = ['phases']
1703 1705
1704 1706 bookmarksrequested = False
1705 1707 legacybookmark = 'bookmarks' in ui.configlist('devel', 'legacy.exchange')
1706 1708 hasbinarybook = 'bookmarks' in pullop.remotebundle2caps
1707 1709
1708 1710 if pullop.remotebookmarks is not None:
1709 1711 pullop.stepsdone.add('request-bookmarks')
1710 1712
1711 1713 if ('request-bookmarks' not in pullop.stepsdone
1712 1714 and pullop.remotebookmarks is None
1713 1715 and not legacybookmark and hasbinarybook):
1714 1716 kwargs['bookmarks'] = True
1715 1717 bookmarksrequested = True
1716 1718
1717 1719 if 'listkeys' in pullop.remotebundle2caps:
1718 1720 if 'request-bookmarks' not in pullop.stepsdone:
1719 1721 # make sure to always includes bookmark data when migrating
1720 1722 # `hg incoming --bundle` to using this function.
1721 1723 pullop.stepsdone.add('request-bookmarks')
1722 1724 kwargs.setdefault('listkeys', []).append('bookmarks')
1723 1725
1724 1726 # If this is a full pull / clone and the server supports the clone bundles
1725 1727 # feature, tell the server whether we attempted a clone bundle. The
1726 1728 # presence of this flag indicates the client supports clone bundles. This
1727 1729 # will enable the server to treat clients that support clone bundles
1728 1730 # differently from those that don't.
1729 1731 if (pullop.remote.capable('clonebundles')
1730 1732 and pullop.heads is None and list(pullop.common) == [nullid]):
1731 1733 kwargs['cbattempted'] = pullop.clonebundleattempted
1732 1734
1733 1735 if streaming:
1734 1736 pullop.repo.ui.status(_('streaming all changes\n'))
1735 1737 elif not pullop.fetch:
1736 1738 pullop.repo.ui.status(_("no changes found\n"))
1737 1739 pullop.cgresult = 0
1738 1740 else:
1739 1741 if pullop.heads is None and list(pullop.common) == [nullid]:
1740 1742 pullop.repo.ui.status(_("requesting all changes\n"))
1741 1743 if obsolete.isenabled(pullop.repo, obsolete.exchangeopt):
1742 1744 remoteversions = bundle2.obsmarkersversion(pullop.remotebundle2caps)
1743 1745 if obsolete.commonversion(remoteversions) is not None:
1744 1746 kwargs['obsmarkers'] = True
1745 1747 pullop.stepsdone.add('obsmarkers')
1746 1748 _pullbundle2extraprepare(pullop, kwargs)
1747 1749
1748 1750 with pullop.remote.commandexecutor() as e:
1749 1751 args = dict(kwargs)
1750 1752 args['source'] = 'pull'
1751 1753 bundle = e.callcommand('getbundle', args).result()
1752 1754
1753 1755 try:
1754 1756 op = bundle2.bundleoperation(pullop.repo, pullop.gettransaction,
1755 1757 source='pull')
1756 1758 op.modes['bookmarks'] = 'records'
1757 1759 bundle2.processbundle(pullop.repo, bundle, op=op)
1758 1760 except bundle2.AbortFromPart as exc:
1759 1761 pullop.repo.ui.status(_('remote: abort: %s\n') % exc)
1760 1762 raise error.Abort(_('pull failed on remote'), hint=exc.hint)
1761 1763 except error.BundleValueError as exc:
1762 1764 raise error.Abort(_('missing support for %s') % exc)
1763 1765
1764 1766 if pullop.fetch:
1765 1767 pullop.cgresult = bundle2.combinechangegroupresults(op)
1766 1768
1767 1769 # processing phases change
1768 1770 for namespace, value in op.records['listkeys']:
1769 1771 if namespace == 'phases':
1770 1772 _pullapplyphases(pullop, value)
1771 1773
1772 1774 # processing bookmark update
1773 1775 if bookmarksrequested:
1774 1776 books = {}
1775 1777 for record in op.records['bookmarks']:
1776 1778 books[record['bookmark']] = record["node"]
1777 1779 pullop.remotebookmarks = books
1778 1780 else:
1779 1781 for namespace, value in op.records['listkeys']:
1780 1782 if namespace == 'bookmarks':
1781 1783 pullop.remotebookmarks = bookmod.unhexlifybookmarks(value)
1782 1784
1783 1785 # bookmark data were either already there or pulled in the bundle
1784 1786 if pullop.remotebookmarks is not None:
1785 1787 _pullbookmarks(pullop)
1786 1788
1787 1789 def _pullbundle2extraprepare(pullop, kwargs):
1788 1790 """hook function so that extensions can extend the getbundle call"""
1789 1791
1790 1792 def _pullchangeset(pullop):
1791 1793 """pull changeset from unbundle into the local repo"""
1792 1794 # We delay the open of the transaction as late as possible so we
1793 1795 # don't open transaction for nothing or you break future useful
1794 1796 # rollback call
1795 1797 if 'changegroup' in pullop.stepsdone:
1796 1798 return
1797 1799 pullop.stepsdone.add('changegroup')
1798 1800 if not pullop.fetch:
1799 1801 pullop.repo.ui.status(_("no changes found\n"))
1800 1802 pullop.cgresult = 0
1801 1803 return
1802 1804 tr = pullop.gettransaction()
1803 1805 if pullop.heads is None and list(pullop.common) == [nullid]:
1804 1806 pullop.repo.ui.status(_("requesting all changes\n"))
1805 1807 elif pullop.heads is None and pullop.remote.capable('changegroupsubset'):
1806 1808 # issue1320, avoid a race if remote changed after discovery
1807 1809 pullop.heads = pullop.rheads
1808 1810
1809 1811 if pullop.remote.capable('getbundle'):
1810 1812 # TODO: get bundlecaps from remote
1811 1813 cg = pullop.remote.getbundle('pull', common=pullop.common,
1812 1814 heads=pullop.heads or pullop.rheads)
1813 1815 elif pullop.heads is None:
1814 1816 with pullop.remote.commandexecutor() as e:
1815 1817 cg = e.callcommand('changegroup', {
1816 1818 'nodes': pullop.fetch,
1817 1819 'source': 'pull',
1818 1820 }).result()
1819 1821
1820 1822 elif not pullop.remote.capable('changegroupsubset'):
1821 1823 raise error.Abort(_("partial pull cannot be done because "
1822 1824 "other repository doesn't support "
1823 1825 "changegroupsubset."))
1824 1826 else:
1825 1827 with pullop.remote.commandexecutor() as e:
1826 1828 cg = e.callcommand('changegroupsubset', {
1827 1829 'bases': pullop.fetch,
1828 1830 'heads': pullop.heads,
1829 1831 'source': 'pull',
1830 1832 }).result()
1831 1833
1832 1834 bundleop = bundle2.applybundle(pullop.repo, cg, tr, 'pull',
1833 1835 pullop.remote.url())
1834 1836 pullop.cgresult = bundle2.combinechangegroupresults(bundleop)
1835 1837
1836 1838 def _pullphase(pullop):
1837 1839 # Get remote phases data from remote
1838 1840 if 'phases' in pullop.stepsdone:
1839 1841 return
1840 1842 remotephases = listkeys(pullop.remote, 'phases')
1841 1843 _pullapplyphases(pullop, remotephases)
1842 1844
1843 1845 def _pullapplyphases(pullop, remotephases):
1844 1846 """apply phase movement from observed remote state"""
1845 1847 if 'phases' in pullop.stepsdone:
1846 1848 return
1847 1849 pullop.stepsdone.add('phases')
1848 1850 publishing = bool(remotephases.get('publishing', False))
1849 1851 if remotephases and not publishing:
1850 1852 # remote is new and non-publishing
1851 1853 pheads, _dr = phases.analyzeremotephases(pullop.repo,
1852 1854 pullop.pulledsubset,
1853 1855 remotephases)
1854 1856 dheads = pullop.pulledsubset
1855 1857 else:
1856 1858 # Remote is old or publishing all common changesets
1857 1859 # should be seen as public
1858 1860 pheads = pullop.pulledsubset
1859 1861 dheads = []
1860 1862 unfi = pullop.repo.unfiltered()
1861 1863 phase = unfi._phasecache.phase
1862 1864 rev = unfi.changelog.nodemap.get
1863 1865 public = phases.public
1864 1866 draft = phases.draft
1865 1867
1866 1868 # exclude changesets already public locally and update the others
1867 1869 pheads = [pn for pn in pheads if phase(unfi, rev(pn)) > public]
1868 1870 if pheads:
1869 1871 tr = pullop.gettransaction()
1870 1872 phases.advanceboundary(pullop.repo, tr, public, pheads)
1871 1873
1872 1874 # exclude changesets already draft locally and update the others
1873 1875 dheads = [pn for pn in dheads if phase(unfi, rev(pn)) > draft]
1874 1876 if dheads:
1875 1877 tr = pullop.gettransaction()
1876 1878 phases.advanceboundary(pullop.repo, tr, draft, dheads)
1877 1879
1878 1880 def _pullbookmarks(pullop):
1879 1881 """process the remote bookmark information to update the local one"""
1880 1882 if 'bookmarks' in pullop.stepsdone:
1881 1883 return
1882 1884 pullop.stepsdone.add('bookmarks')
1883 1885 repo = pullop.repo
1884 1886 remotebookmarks = pullop.remotebookmarks
1885 1887 bookmod.updatefromremote(repo.ui, repo, remotebookmarks,
1886 1888 pullop.remote.url(),
1887 1889 pullop.gettransaction,
1888 1890 explicit=pullop.explicitbookmarks)
1889 1891
1890 1892 def _pullobsolete(pullop):
1891 1893 """utility function to pull obsolete markers from a remote
1892 1894
1893 1895 The `gettransaction` is function that return the pull transaction, creating
1894 1896 one if necessary. We return the transaction to inform the calling code that
1895 1897 a new transaction have been created (when applicable).
1896 1898
1897 1899 Exists mostly to allow overriding for experimentation purpose"""
1898 1900 if 'obsmarkers' in pullop.stepsdone:
1899 1901 return
1900 1902 pullop.stepsdone.add('obsmarkers')
1901 1903 tr = None
1902 1904 if obsolete.isenabled(pullop.repo, obsolete.exchangeopt):
1903 1905 pullop.repo.ui.debug('fetching remote obsolete markers\n')
1904 1906 remoteobs = listkeys(pullop.remote, 'obsolete')
1905 1907 if 'dump0' in remoteobs:
1906 1908 tr = pullop.gettransaction()
1907 1909 markers = []
1908 1910 for key in sorted(remoteobs, reverse=True):
1909 1911 if key.startswith('dump'):
1910 1912 data = util.b85decode(remoteobs[key])
1911 1913 version, newmarks = obsolete._readmarkers(data)
1912 1914 markers += newmarks
1913 1915 if markers:
1914 1916 pullop.repo.obsstore.add(tr, markers)
1915 1917 pullop.repo.invalidatevolatilesets()
1916 1918 return tr
1917 1919
1918 1920 def applynarrowacl(repo, kwargs):
1919 1921 """Apply narrow fetch access control.
1920 1922
1921 1923 This massages the named arguments for getbundle wire protocol commands
1922 1924 so requested data is filtered through access control rules.
1923 1925 """
1924 1926 ui = repo.ui
1925 1927 # TODO this assumes existence of HTTP and is a layering violation.
1926 1928 username = ui.shortuser(ui.environ.get('REMOTE_USER') or ui.username())
1927 1929 user_includes = ui.configlist(
1928 1930 _NARROWACL_SECTION, username + '.includes',
1929 1931 ui.configlist(_NARROWACL_SECTION, 'default.includes'))
1930 1932 user_excludes = ui.configlist(
1931 1933 _NARROWACL_SECTION, username + '.excludes',
1932 1934 ui.configlist(_NARROWACL_SECTION, 'default.excludes'))
1933 1935 if not user_includes:
1934 1936 raise error.Abort(_("{} configuration for user {} is empty")
1935 1937 .format(_NARROWACL_SECTION, username))
1936 1938
1937 1939 user_includes = [
1938 1940 'path:.' if p == '*' else 'path:' + p for p in user_includes]
1939 1941 user_excludes = [
1940 1942 'path:.' if p == '*' else 'path:' + p for p in user_excludes]
1941 1943
1942 1944 req_includes = set(kwargs.get(r'includepats', []))
1943 1945 req_excludes = set(kwargs.get(r'excludepats', []))
1944 1946
1945 1947 req_includes, req_excludes, invalid_includes = narrowspec.restrictpatterns(
1946 1948 req_includes, req_excludes, user_includes, user_excludes)
1947 1949
1948 1950 if invalid_includes:
1949 1951 raise error.Abort(
1950 1952 _("The following includes are not accessible for {}: {}")
1951 1953 .format(username, invalid_includes))
1952 1954
1953 1955 new_args = {}
1954 1956 new_args.update(kwargs)
1955 1957 new_args[r'narrow'] = True
1956 1958 new_args[r'narrow_acl'] = True
1957 1959 new_args[r'includepats'] = req_includes
1958 1960 if req_excludes:
1959 1961 new_args[r'excludepats'] = req_excludes
1960 1962
1961 1963 return new_args
1962 1964
1963 1965 def _computeellipsis(repo, common, heads, known, match, depth=None):
1964 1966 """Compute the shape of a narrowed DAG.
1965 1967
1966 1968 Args:
1967 1969 repo: The repository we're transferring.
1968 1970 common: The roots of the DAG range we're transferring.
1969 1971 May be just [nullid], which means all ancestors of heads.
1970 1972 heads: The heads of the DAG range we're transferring.
1971 1973 match: The narrowmatcher that allows us to identify relevant changes.
1972 1974 depth: If not None, only consider nodes to be full nodes if they are at
1973 1975 most depth changesets away from one of heads.
1974 1976
1975 1977 Returns:
1976 1978 A tuple of (visitnodes, relevant_nodes, ellipsisroots) where:
1977 1979
1978 1980 visitnodes: The list of nodes (either full or ellipsis) which
1979 1981 need to be sent to the client.
1980 1982 relevant_nodes: The set of changelog nodes which change a file inside
1981 1983 the narrowspec. The client needs these as non-ellipsis nodes.
1982 1984 ellipsisroots: A dict of {rev: parents} that is used in
1983 1985 narrowchangegroup to produce ellipsis nodes with the
1984 1986 correct parents.
1985 1987 """
1986 1988 cl = repo.changelog
1987 1989 mfl = repo.manifestlog
1988 1990
1989 1991 clrev = cl.rev
1990 1992
1991 1993 commonrevs = {clrev(n) for n in common} | {nullrev}
1992 1994 headsrevs = {clrev(n) for n in heads}
1993 1995
1994 1996 if depth:
1995 1997 revdepth = {h: 0 for h in headsrevs}
1996 1998
1997 1999 ellipsisheads = collections.defaultdict(set)
1998 2000 ellipsisroots = collections.defaultdict(set)
1999 2001
2000 2002 def addroot(head, curchange):
2001 2003 """Add a root to an ellipsis head, splitting heads with 3 roots."""
2002 2004 ellipsisroots[head].add(curchange)
2003 2005 # Recursively split ellipsis heads with 3 roots by finding the
2004 2006 # roots' youngest common descendant which is an elided merge commit.
2005 2007 # That descendant takes 2 of the 3 roots as its own, and becomes a
2006 2008 # root of the head.
2007 2009 while len(ellipsisroots[head]) > 2:
2008 2010 child, roots = splithead(head)
2009 2011 splitroots(head, child, roots)
2010 2012 head = child # Recurse in case we just added a 3rd root
2011 2013
2012 2014 def splitroots(head, child, roots):
2013 2015 ellipsisroots[head].difference_update(roots)
2014 2016 ellipsisroots[head].add(child)
2015 2017 ellipsisroots[child].update(roots)
2016 2018 ellipsisroots[child].discard(child)
2017 2019
2018 2020 def splithead(head):
2019 2021 r1, r2, r3 = sorted(ellipsisroots[head])
2020 2022 for nr1, nr2 in ((r2, r3), (r1, r3), (r1, r2)):
2021 2023 mid = repo.revs('sort(merge() & %d::%d & %d::%d, -rev)',
2022 2024 nr1, head, nr2, head)
2023 2025 for j in mid:
2024 2026 if j == nr2:
2025 2027 return nr2, (nr1, nr2)
2026 2028 if j not in ellipsisroots or len(ellipsisroots[j]) < 2:
2027 2029 return j, (nr1, nr2)
2028 2030 raise error.Abort(_('Failed to split up ellipsis node! head: %d, '
2029 2031 'roots: %d %d %d') % (head, r1, r2, r3))
2030 2032
2031 2033 missing = list(cl.findmissingrevs(common=commonrevs, heads=headsrevs))
2032 2034 visit = reversed(missing)
2033 2035 relevant_nodes = set()
2034 2036 visitnodes = [cl.node(m) for m in missing]
2035 2037 required = set(headsrevs) | known
2036 2038 for rev in visit:
2037 2039 clrev = cl.changelogrevision(rev)
2038 2040 ps = [prev for prev in cl.parentrevs(rev) if prev != nullrev]
2039 2041 if depth is not None:
2040 2042 curdepth = revdepth[rev]
2041 2043 for p in ps:
2042 2044 revdepth[p] = min(curdepth + 1, revdepth.get(p, depth + 1))
2043 2045 needed = False
2044 2046 shallow_enough = depth is None or revdepth[rev] <= depth
2045 2047 if shallow_enough:
2046 2048 curmf = mfl[clrev.manifest].read()
2047 2049 if ps:
2048 2050 # We choose to not trust the changed files list in
2049 2051 # changesets because it's not always correct. TODO: could
2050 2052 # we trust it for the non-merge case?
2051 2053 p1mf = mfl[cl.changelogrevision(ps[0]).manifest].read()
2052 2054 needed = bool(curmf.diff(p1mf, match))
2053 2055 if not needed and len(ps) > 1:
2054 2056 # For merge changes, the list of changed files is not
2055 2057 # helpful, since we need to emit the merge if a file
2056 2058 # in the narrow spec has changed on either side of the
2057 2059 # merge. As a result, we do a manifest diff to check.
2058 2060 p2mf = mfl[cl.changelogrevision(ps[1]).manifest].read()
2059 2061 needed = bool(curmf.diff(p2mf, match))
2060 2062 else:
2061 2063 # For a root node, we need to include the node if any
2062 2064 # files in the node match the narrowspec.
2063 2065 needed = any(curmf.walk(match))
2064 2066
2065 2067 if needed:
2066 2068 for head in ellipsisheads[rev]:
2067 2069 addroot(head, rev)
2068 2070 for p in ps:
2069 2071 required.add(p)
2070 2072 relevant_nodes.add(cl.node(rev))
2071 2073 else:
2072 2074 if not ps:
2073 2075 ps = [nullrev]
2074 2076 if rev in required:
2075 2077 for head in ellipsisheads[rev]:
2076 2078 addroot(head, rev)
2077 2079 for p in ps:
2078 2080 ellipsisheads[p].add(rev)
2079 2081 else:
2080 2082 for p in ps:
2081 2083 ellipsisheads[p] |= ellipsisheads[rev]
2082 2084
2083 2085 # add common changesets as roots of their reachable ellipsis heads
2084 2086 for c in commonrevs:
2085 2087 for head in ellipsisheads[c]:
2086 2088 addroot(head, c)
2087 2089 return visitnodes, relevant_nodes, ellipsisroots
2088 2090
2089 2091 def caps20to10(repo, role):
2090 2092 """return a set with appropriate options to use bundle20 during getbundle"""
2091 2093 caps = {'HG20'}
2092 2094 capsblob = bundle2.encodecaps(bundle2.getrepocaps(repo, role=role))
2093 2095 caps.add('bundle2=' + urlreq.quote(capsblob))
2094 2096 return caps
2095 2097
2096 2098 # List of names of steps to perform for a bundle2 for getbundle, order matters.
2097 2099 getbundle2partsorder = []
2098 2100
2099 2101 # Mapping between step name and function
2100 2102 #
2101 2103 # This exists to help extensions wrap steps if necessary
2102 2104 getbundle2partsmapping = {}
2103 2105
2104 2106 def getbundle2partsgenerator(stepname, idx=None):
2105 2107 """decorator for function generating bundle2 part for getbundle
2106 2108
2107 2109 The function is added to the step -> function mapping and appended to the
2108 2110 list of steps. Beware that decorated functions will be added in order
2109 2111 (this may matter).
2110 2112
2111 2113 You can only use this decorator for new steps, if you want to wrap a step
2112 2114 from an extension, attack the getbundle2partsmapping dictionary directly."""
2113 2115 def dec(func):
2114 2116 assert stepname not in getbundle2partsmapping
2115 2117 getbundle2partsmapping[stepname] = func
2116 2118 if idx is None:
2117 2119 getbundle2partsorder.append(stepname)
2118 2120 else:
2119 2121 getbundle2partsorder.insert(idx, stepname)
2120 2122 return func
2121 2123 return dec
2122 2124
2123 2125 def bundle2requested(bundlecaps):
2124 2126 if bundlecaps is not None:
2125 2127 return any(cap.startswith('HG2') for cap in bundlecaps)
2126 2128 return False
2127 2129
2128 2130 def getbundlechunks(repo, source, heads=None, common=None, bundlecaps=None,
2129 2131 **kwargs):
2130 2132 """Return chunks constituting a bundle's raw data.
2131 2133
2132 2134 Could be a bundle HG10 or a bundle HG20 depending on bundlecaps
2133 2135 passed.
2134 2136
2135 2137 Returns a 2-tuple of a dict with metadata about the generated bundle
2136 2138 and an iterator over raw chunks (of varying sizes).
2137 2139 """
2138 2140 kwargs = pycompat.byteskwargs(kwargs)
2139 2141 info = {}
2140 2142 usebundle2 = bundle2requested(bundlecaps)
2141 2143 # bundle10 case
2142 2144 if not usebundle2:
2143 2145 if bundlecaps and not kwargs.get('cg', True):
2144 2146 raise ValueError(_('request for bundle10 must include changegroup'))
2145 2147
2146 2148 if kwargs:
2147 2149 raise ValueError(_('unsupported getbundle arguments: %s')
2148 2150 % ', '.join(sorted(kwargs.keys())))
2149 2151 outgoing = _computeoutgoing(repo, heads, common)
2150 2152 info['bundleversion'] = 1
2151 2153 return info, changegroup.makestream(repo, outgoing, '01', source,
2152 2154 bundlecaps=bundlecaps)
2153 2155
2154 2156 # bundle20 case
2155 2157 info['bundleversion'] = 2
2156 2158 b2caps = {}
2157 2159 for bcaps in bundlecaps:
2158 2160 if bcaps.startswith('bundle2='):
2159 2161 blob = urlreq.unquote(bcaps[len('bundle2='):])
2160 2162 b2caps.update(bundle2.decodecaps(blob))
2161 2163 bundler = bundle2.bundle20(repo.ui, b2caps)
2162 2164
2163 2165 kwargs['heads'] = heads
2164 2166 kwargs['common'] = common
2165 2167
2166 2168 for name in getbundle2partsorder:
2167 2169 func = getbundle2partsmapping[name]
2168 2170 func(bundler, repo, source, bundlecaps=bundlecaps, b2caps=b2caps,
2169 2171 **pycompat.strkwargs(kwargs))
2170 2172
2171 2173 info['prefercompressed'] = bundler.prefercompressed
2172 2174
2173 2175 return info, bundler.getchunks()
2174 2176
2175 2177 @getbundle2partsgenerator('stream2')
2176 2178 def _getbundlestream2(bundler, repo, *args, **kwargs):
2177 2179 return bundle2.addpartbundlestream2(bundler, repo, **kwargs)
2178 2180
2179 2181 @getbundle2partsgenerator('changegroup')
2180 2182 def _getbundlechangegrouppart(bundler, repo, source, bundlecaps=None,
2181 2183 b2caps=None, heads=None, common=None, **kwargs):
2182 2184 """add a changegroup part to the requested bundle"""
2183 2185 if not kwargs.get(r'cg', True):
2184 2186 return
2185 2187
2186 2188 version = '01'
2187 2189 cgversions = b2caps.get('changegroup')
2188 2190 if cgversions: # 3.1 and 3.2 ship with an empty value
2189 2191 cgversions = [v for v in cgversions
2190 2192 if v in changegroup.supportedoutgoingversions(repo)]
2191 2193 if not cgversions:
2192 2194 raise error.Abort(_('no common changegroup version'))
2193 2195 version = max(cgversions)
2194 2196
2195 2197 outgoing = _computeoutgoing(repo, heads, common)
2196 2198 if not outgoing.missing:
2197 2199 return
2198 2200
2199 2201 if kwargs.get(r'narrow', False):
2200 2202 include = sorted(filter(bool, kwargs.get(r'includepats', [])))
2201 2203 exclude = sorted(filter(bool, kwargs.get(r'excludepats', [])))
2202 2204 matcher = narrowspec.match(repo.root, include=include, exclude=exclude)
2203 2205 else:
2204 2206 matcher = None
2205 2207
2206 2208 cgstream = changegroup.makestream(repo, outgoing, version, source,
2207 2209 bundlecaps=bundlecaps, matcher=matcher)
2208 2210
2209 2211 part = bundler.newpart('changegroup', data=cgstream)
2210 2212 if cgversions:
2211 2213 part.addparam('version', version)
2212 2214
2213 2215 part.addparam('nbchanges', '%d' % len(outgoing.missing),
2214 2216 mandatory=False)
2215 2217
2216 2218 if 'treemanifest' in repo.requirements:
2217 2219 part.addparam('treemanifest', '1')
2218 2220
2219 2221 if (kwargs.get(r'narrow', False) and kwargs.get(r'narrow_acl', False)
2220 2222 and (include or exclude)):
2221 2223 # this is mandatory because otherwise ACL clients won't work
2222 2224 narrowspecpart = bundler.newpart('Narrow:responsespec')
2223 2225 narrowspecpart.data = '%s\0%s' % ('\n'.join(include),
2224 2226 '\n'.join(exclude))
2225 2227
2226 2228 @getbundle2partsgenerator('bookmarks')
2227 2229 def _getbundlebookmarkpart(bundler, repo, source, bundlecaps=None,
2228 2230 b2caps=None, **kwargs):
2229 2231 """add a bookmark part to the requested bundle"""
2230 2232 if not kwargs.get(r'bookmarks', False):
2231 2233 return
2232 2234 if 'bookmarks' not in b2caps:
2233 2235 raise error.Abort(_('no common bookmarks exchange method'))
2234 2236 books = bookmod.listbinbookmarks(repo)
2235 2237 data = bookmod.binaryencode(books)
2236 2238 if data:
2237 2239 bundler.newpart('bookmarks', data=data)
2238 2240
2239 2241 @getbundle2partsgenerator('listkeys')
2240 2242 def _getbundlelistkeysparts(bundler, repo, source, bundlecaps=None,
2241 2243 b2caps=None, **kwargs):
2242 2244 """add parts containing listkeys namespaces to the requested bundle"""
2243 2245 listkeys = kwargs.get(r'listkeys', ())
2244 2246 for namespace in listkeys:
2245 2247 part = bundler.newpart('listkeys')
2246 2248 part.addparam('namespace', namespace)
2247 2249 keys = repo.listkeys(namespace).items()
2248 2250 part.data = pushkey.encodekeys(keys)
2249 2251
2250 2252 @getbundle2partsgenerator('obsmarkers')
2251 2253 def _getbundleobsmarkerpart(bundler, repo, source, bundlecaps=None,
2252 2254 b2caps=None, heads=None, **kwargs):
2253 2255 """add an obsolescence markers part to the requested bundle"""
2254 2256 if kwargs.get(r'obsmarkers', False):
2255 2257 if heads is None:
2256 2258 heads = repo.heads()
2257 2259 subset = [c.node() for c in repo.set('::%ln', heads)]
2258 2260 markers = repo.obsstore.relevantmarkers(subset)
2259 2261 markers = sorted(markers)
2260 2262 bundle2.buildobsmarkerspart(bundler, markers)
2261 2263
2262 2264 @getbundle2partsgenerator('phases')
2263 2265 def _getbundlephasespart(bundler, repo, source, bundlecaps=None,
2264 2266 b2caps=None, heads=None, **kwargs):
2265 2267 """add phase heads part to the requested bundle"""
2266 2268 if kwargs.get(r'phases', False):
2267 2269 if not 'heads' in b2caps.get('phases'):
2268 2270 raise error.Abort(_('no common phases exchange method'))
2269 2271 if heads is None:
2270 2272 heads = repo.heads()
2271 2273
2272 2274 headsbyphase = collections.defaultdict(set)
2273 2275 if repo.publishing():
2274 2276 headsbyphase[phases.public] = heads
2275 2277 else:
2276 2278 # find the appropriate heads to move
2277 2279
2278 2280 phase = repo._phasecache.phase
2279 2281 node = repo.changelog.node
2280 2282 rev = repo.changelog.rev
2281 2283 for h in heads:
2282 2284 headsbyphase[phase(repo, rev(h))].add(h)
2283 2285 seenphases = list(headsbyphase.keys())
2284 2286
2285 2287 # We do not handle anything but public and draft phase for now)
2286 2288 if seenphases:
2287 2289 assert max(seenphases) <= phases.draft
2288 2290
2289 2291 # if client is pulling non-public changesets, we need to find
2290 2292 # intermediate public heads.
2291 2293 draftheads = headsbyphase.get(phases.draft, set())
2292 2294 if draftheads:
2293 2295 publicheads = headsbyphase.get(phases.public, set())
2294 2296
2295 2297 revset = 'heads(only(%ln, %ln) and public())'
2296 2298 extraheads = repo.revs(revset, draftheads, publicheads)
2297 2299 for r in extraheads:
2298 2300 headsbyphase[phases.public].add(node(r))
2299 2301
2300 2302 # transform data in a format used by the encoding function
2301 2303 phasemapping = []
2302 2304 for phase in phases.allphases:
2303 2305 phasemapping.append(sorted(headsbyphase[phase]))
2304 2306
2305 2307 # generate the actual part
2306 2308 phasedata = phases.binaryencode(phasemapping)
2307 2309 bundler.newpart('phase-heads', data=phasedata)
2308 2310
2309 2311 @getbundle2partsgenerator('hgtagsfnodes')
2310 2312 def _getbundletagsfnodes(bundler, repo, source, bundlecaps=None,
2311 2313 b2caps=None, heads=None, common=None,
2312 2314 **kwargs):
2313 2315 """Transfer the .hgtags filenodes mapping.
2314 2316
2315 2317 Only values for heads in this bundle will be transferred.
2316 2318
2317 2319 The part data consists of pairs of 20 byte changeset node and .hgtags
2318 2320 filenodes raw values.
2319 2321 """
2320 2322 # Don't send unless:
2321 2323 # - changeset are being exchanged,
2322 2324 # - the client supports it.
2323 2325 if not (kwargs.get(r'cg', True) and 'hgtagsfnodes' in b2caps):
2324 2326 return
2325 2327
2326 2328 outgoing = _computeoutgoing(repo, heads, common)
2327 2329 bundle2.addparttagsfnodescache(repo, bundler, outgoing)
2328 2330
2329 2331 @getbundle2partsgenerator('cache:rev-branch-cache')
2330 2332 def _getbundlerevbranchcache(bundler, repo, source, bundlecaps=None,
2331 2333 b2caps=None, heads=None, common=None,
2332 2334 **kwargs):
2333 2335 """Transfer the rev-branch-cache mapping
2334 2336
2335 2337 The payload is a series of data related to each branch
2336 2338
2337 2339 1) branch name length
2338 2340 2) number of open heads
2339 2341 3) number of closed heads
2340 2342 4) open heads nodes
2341 2343 5) closed heads nodes
2342 2344 """
2343 2345 # Don't send unless:
2344 2346 # - changeset are being exchanged,
2345 2347 # - the client supports it.
2346 2348 # - narrow bundle isn't in play (not currently compatible).
2347 2349 if (not kwargs.get(r'cg', True)
2348 2350 or 'rev-branch-cache' not in b2caps
2349 2351 or kwargs.get(r'narrow', False)
2350 2352 or repo.ui.has_section(_NARROWACL_SECTION)):
2351 2353 return
2352 2354
2353 2355 outgoing = _computeoutgoing(repo, heads, common)
2354 2356 bundle2.addpartrevbranchcache(repo, bundler, outgoing)
2355 2357
2356 2358 def check_heads(repo, their_heads, context):
2357 2359 """check if the heads of a repo have been modified
2358 2360
2359 2361 Used by peer for unbundling.
2360 2362 """
2361 2363 heads = repo.heads()
2362 2364 heads_hash = hashlib.sha1(''.join(sorted(heads))).digest()
2363 2365 if not (their_heads == ['force'] or their_heads == heads or
2364 2366 their_heads == ['hashed', heads_hash]):
2365 2367 # someone else committed/pushed/unbundled while we
2366 2368 # were transferring data
2367 2369 raise error.PushRaced('repository changed while %s - '
2368 2370 'please try again' % context)
2369 2371
2370 2372 def unbundle(repo, cg, heads, source, url):
2371 2373 """Apply a bundle to a repo.
2372 2374
2373 2375 this function makes sure the repo is locked during the application and have
2374 2376 mechanism to check that no push race occurred between the creation of the
2375 2377 bundle and its application.
2376 2378
2377 2379 If the push was raced as PushRaced exception is raised."""
2378 2380 r = 0
2379 2381 # need a transaction when processing a bundle2 stream
2380 2382 # [wlock, lock, tr] - needs to be an array so nested functions can modify it
2381 2383 lockandtr = [None, None, None]
2382 2384 recordout = None
2383 2385 # quick fix for output mismatch with bundle2 in 3.4
2384 2386 captureoutput = repo.ui.configbool('experimental', 'bundle2-output-capture')
2385 2387 if url.startswith('remote:http:') or url.startswith('remote:https:'):
2386 2388 captureoutput = True
2387 2389 try:
2388 2390 # note: outside bundle1, 'heads' is expected to be empty and this
2389 2391 # 'check_heads' call wil be a no-op
2390 2392 check_heads(repo, heads, 'uploading changes')
2391 2393 # push can proceed
2392 2394 if not isinstance(cg, bundle2.unbundle20):
2393 2395 # legacy case: bundle1 (changegroup 01)
2394 2396 txnname = "\n".join([source, util.hidepassword(url)])
2395 2397 with repo.lock(), repo.transaction(txnname) as tr:
2396 2398 op = bundle2.applybundle(repo, cg, tr, source, url)
2397 2399 r = bundle2.combinechangegroupresults(op)
2398 2400 else:
2399 2401 r = None
2400 2402 try:
2401 2403 def gettransaction():
2402 2404 if not lockandtr[2]:
2403 2405 if not bookmod.bookmarksinstore(repo):
2404 2406 lockandtr[0] = repo.wlock()
2405 2407 lockandtr[1] = repo.lock()
2406 2408 lockandtr[2] = repo.transaction(source)
2407 2409 lockandtr[2].hookargs['source'] = source
2408 2410 lockandtr[2].hookargs['url'] = url
2409 2411 lockandtr[2].hookargs['bundle2'] = '1'
2410 2412 return lockandtr[2]
2411 2413
2412 2414 # Do greedy locking by default until we're satisfied with lazy
2413 2415 # locking.
2414 2416 if not repo.ui.configbool('experimental', 'bundle2lazylocking'):
2415 2417 gettransaction()
2416 2418
2417 2419 op = bundle2.bundleoperation(repo, gettransaction,
2418 2420 captureoutput=captureoutput,
2419 2421 source='push')
2420 2422 try:
2421 2423 op = bundle2.processbundle(repo, cg, op=op)
2422 2424 finally:
2423 2425 r = op.reply
2424 2426 if captureoutput and r is not None:
2425 2427 repo.ui.pushbuffer(error=True, subproc=True)
2426 2428 def recordout(output):
2427 2429 r.newpart('output', data=output, mandatory=False)
2428 2430 if lockandtr[2] is not None:
2429 2431 lockandtr[2].close()
2430 2432 except BaseException as exc:
2431 2433 exc.duringunbundle2 = True
2432 2434 if captureoutput and r is not None:
2433 2435 parts = exc._bundle2salvagedoutput = r.salvageoutput()
2434 2436 def recordout(output):
2435 2437 part = bundle2.bundlepart('output', data=output,
2436 2438 mandatory=False)
2437 2439 parts.append(part)
2438 2440 raise
2439 2441 finally:
2440 2442 lockmod.release(lockandtr[2], lockandtr[1], lockandtr[0])
2441 2443 if recordout is not None:
2442 2444 recordout(repo.ui.popbuffer())
2443 2445 return r
2444 2446
2445 2447 def _maybeapplyclonebundle(pullop):
2446 2448 """Apply a clone bundle from a remote, if possible."""
2447 2449
2448 2450 repo = pullop.repo
2449 2451 remote = pullop.remote
2450 2452
2451 2453 if not repo.ui.configbool('ui', 'clonebundles'):
2452 2454 return
2453 2455
2454 2456 # Only run if local repo is empty.
2455 2457 if len(repo):
2456 2458 return
2457 2459
2458 2460 if pullop.heads:
2459 2461 return
2460 2462
2461 2463 if not remote.capable('clonebundles'):
2462 2464 return
2463 2465
2464 2466 with remote.commandexecutor() as e:
2465 2467 res = e.callcommand('clonebundles', {}).result()
2466 2468
2467 2469 # If we call the wire protocol command, that's good enough to record the
2468 2470 # attempt.
2469 2471 pullop.clonebundleattempted = True
2470 2472
2471 2473 entries = parseclonebundlesmanifest(repo, res)
2472 2474 if not entries:
2473 2475 repo.ui.note(_('no clone bundles available on remote; '
2474 2476 'falling back to regular clone\n'))
2475 2477 return
2476 2478
2477 2479 entries = filterclonebundleentries(
2478 2480 repo, entries, streamclonerequested=pullop.streamclonerequested)
2479 2481
2480 2482 if not entries:
2481 2483 # There is a thundering herd concern here. However, if a server
2482 2484 # operator doesn't advertise bundles appropriate for its clients,
2483 2485 # they deserve what's coming. Furthermore, from a client's
2484 2486 # perspective, no automatic fallback would mean not being able to
2485 2487 # clone!
2486 2488 repo.ui.warn(_('no compatible clone bundles available on server; '
2487 2489 'falling back to regular clone\n'))
2488 2490 repo.ui.warn(_('(you may want to report this to the server '
2489 2491 'operator)\n'))
2490 2492 return
2491 2493
2492 2494 entries = sortclonebundleentries(repo.ui, entries)
2493 2495
2494 2496 url = entries[0]['URL']
2495 2497 repo.ui.status(_('applying clone bundle from %s\n') % url)
2496 2498 if trypullbundlefromurl(repo.ui, repo, url):
2497 2499 repo.ui.status(_('finished applying clone bundle\n'))
2498 2500 # Bundle failed.
2499 2501 #
2500 2502 # We abort by default to avoid the thundering herd of
2501 2503 # clients flooding a server that was expecting expensive
2502 2504 # clone load to be offloaded.
2503 2505 elif repo.ui.configbool('ui', 'clonebundlefallback'):
2504 2506 repo.ui.warn(_('falling back to normal clone\n'))
2505 2507 else:
2506 2508 raise error.Abort(_('error applying bundle'),
2507 2509 hint=_('if this error persists, consider contacting '
2508 2510 'the server operator or disable clone '
2509 2511 'bundles via '
2510 2512 '"--config ui.clonebundles=false"'))
2511 2513
2512 2514 def parseclonebundlesmanifest(repo, s):
2513 2515 """Parses the raw text of a clone bundles manifest.
2514 2516
2515 2517 Returns a list of dicts. The dicts have a ``URL`` key corresponding
2516 2518 to the URL and other keys are the attributes for the entry.
2517 2519 """
2518 2520 m = []
2519 2521 for line in s.splitlines():
2520 2522 fields = line.split()
2521 2523 if not fields:
2522 2524 continue
2523 2525 attrs = {'URL': fields[0]}
2524 2526 for rawattr in fields[1:]:
2525 2527 key, value = rawattr.split('=', 1)
2526 2528 key = urlreq.unquote(key)
2527 2529 value = urlreq.unquote(value)
2528 2530 attrs[key] = value
2529 2531
2530 2532 # Parse BUNDLESPEC into components. This makes client-side
2531 2533 # preferences easier to specify since you can prefer a single
2532 2534 # component of the BUNDLESPEC.
2533 2535 if key == 'BUNDLESPEC':
2534 2536 try:
2535 2537 bundlespec = parsebundlespec(repo, value)
2536 2538 attrs['COMPRESSION'] = bundlespec.compression
2537 2539 attrs['VERSION'] = bundlespec.version
2538 2540 except error.InvalidBundleSpecification:
2539 2541 pass
2540 2542 except error.UnsupportedBundleSpecification:
2541 2543 pass
2542 2544
2543 2545 m.append(attrs)
2544 2546
2545 2547 return m
2546 2548
2547 2549 def isstreamclonespec(bundlespec):
2548 2550 # Stream clone v1
2549 2551 if (bundlespec.wirecompression == 'UN' and bundlespec.wireversion == 's1'):
2550 2552 return True
2551 2553
2552 2554 # Stream clone v2
2553 2555 if (bundlespec.wirecompression == 'UN' and
2554 2556 bundlespec.wireversion == '02' and
2555 2557 bundlespec.contentopts.get('streamv2')):
2556 2558 return True
2557 2559
2558 2560 return False
2559 2561
2560 2562 def filterclonebundleentries(repo, entries, streamclonerequested=False):
2561 2563 """Remove incompatible clone bundle manifest entries.
2562 2564
2563 2565 Accepts a list of entries parsed with ``parseclonebundlesmanifest``
2564 2566 and returns a new list consisting of only the entries that this client
2565 2567 should be able to apply.
2566 2568
2567 2569 There is no guarantee we'll be able to apply all returned entries because
2568 2570 the metadata we use to filter on may be missing or wrong.
2569 2571 """
2570 2572 newentries = []
2571 2573 for entry in entries:
2572 2574 spec = entry.get('BUNDLESPEC')
2573 2575 if spec:
2574 2576 try:
2575 2577 bundlespec = parsebundlespec(repo, spec, strict=True)
2576 2578
2577 2579 # If a stream clone was requested, filter out non-streamclone
2578 2580 # entries.
2579 2581 if streamclonerequested and not isstreamclonespec(bundlespec):
2580 2582 repo.ui.debug('filtering %s because not a stream clone\n' %
2581 2583 entry['URL'])
2582 2584 continue
2583 2585
2584 2586 except error.InvalidBundleSpecification as e:
2585 2587 repo.ui.debug(stringutil.forcebytestr(e) + '\n')
2586 2588 continue
2587 2589 except error.UnsupportedBundleSpecification as e:
2588 2590 repo.ui.debug('filtering %s because unsupported bundle '
2589 2591 'spec: %s\n' % (
2590 2592 entry['URL'], stringutil.forcebytestr(e)))
2591 2593 continue
2592 2594 # If we don't have a spec and requested a stream clone, we don't know
2593 2595 # what the entry is so don't attempt to apply it.
2594 2596 elif streamclonerequested:
2595 2597 repo.ui.debug('filtering %s because cannot determine if a stream '
2596 2598 'clone bundle\n' % entry['URL'])
2597 2599 continue
2598 2600
2599 2601 if 'REQUIRESNI' in entry and not sslutil.hassni:
2600 2602 repo.ui.debug('filtering %s because SNI not supported\n' %
2601 2603 entry['URL'])
2602 2604 continue
2603 2605
2604 2606 newentries.append(entry)
2605 2607
2606 2608 return newentries
2607 2609
2608 2610 class clonebundleentry(object):
2609 2611 """Represents an item in a clone bundles manifest.
2610 2612
2611 2613 This rich class is needed to support sorting since sorted() in Python 3
2612 2614 doesn't support ``cmp`` and our comparison is complex enough that ``key=``
2613 2615 won't work.
2614 2616 """
2615 2617
2616 2618 def __init__(self, value, prefers):
2617 2619 self.value = value
2618 2620 self.prefers = prefers
2619 2621
2620 2622 def _cmp(self, other):
2621 2623 for prefkey, prefvalue in self.prefers:
2622 2624 avalue = self.value.get(prefkey)
2623 2625 bvalue = other.value.get(prefkey)
2624 2626
2625 2627 # Special case for b missing attribute and a matches exactly.
2626 2628 if avalue is not None and bvalue is None and avalue == prefvalue:
2627 2629 return -1
2628 2630
2629 2631 # Special case for a missing attribute and b matches exactly.
2630 2632 if bvalue is not None and avalue is None and bvalue == prefvalue:
2631 2633 return 1
2632 2634
2633 2635 # We can't compare unless attribute present on both.
2634 2636 if avalue is None or bvalue is None:
2635 2637 continue
2636 2638
2637 2639 # Same values should fall back to next attribute.
2638 2640 if avalue == bvalue:
2639 2641 continue
2640 2642
2641 2643 # Exact matches come first.
2642 2644 if avalue == prefvalue:
2643 2645 return -1
2644 2646 if bvalue == prefvalue:
2645 2647 return 1
2646 2648
2647 2649 # Fall back to next attribute.
2648 2650 continue
2649 2651
2650 2652 # If we got here we couldn't sort by attributes and prefers. Fall
2651 2653 # back to index order.
2652 2654 return 0
2653 2655
2654 2656 def __lt__(self, other):
2655 2657 return self._cmp(other) < 0
2656 2658
2657 2659 def __gt__(self, other):
2658 2660 return self._cmp(other) > 0
2659 2661
2660 2662 def __eq__(self, other):
2661 2663 return self._cmp(other) == 0
2662 2664
2663 2665 def __le__(self, other):
2664 2666 return self._cmp(other) <= 0
2665 2667
2666 2668 def __ge__(self, other):
2667 2669 return self._cmp(other) >= 0
2668 2670
2669 2671 def __ne__(self, other):
2670 2672 return self._cmp(other) != 0
2671 2673
2672 2674 def sortclonebundleentries(ui, entries):
2673 2675 prefers = ui.configlist('ui', 'clonebundleprefers')
2674 2676 if not prefers:
2675 2677 return list(entries)
2676 2678
2677 2679 prefers = [p.split('=', 1) for p in prefers]
2678 2680
2679 2681 items = sorted(clonebundleentry(v, prefers) for v in entries)
2680 2682 return [i.value for i in items]
2681 2683
2682 2684 def trypullbundlefromurl(ui, repo, url):
2683 2685 """Attempt to apply a bundle from a URL."""
2684 2686 with repo.lock(), repo.transaction('bundleurl') as tr:
2685 2687 try:
2686 2688 fh = urlmod.open(ui, url)
2687 2689 cg = readbundle(ui, fh, 'stream')
2688 2690
2689 2691 if isinstance(cg, streamclone.streamcloneapplier):
2690 2692 cg.apply(repo)
2691 2693 else:
2692 2694 bundle2.applybundle(repo, cg, tr, 'clonebundles', url)
2693 2695 return True
2694 2696 except urlerr.httperror as e:
2695 2697 ui.warn(_('HTTP error fetching bundle: %s\n') %
2696 2698 stringutil.forcebytestr(e))
2697 2699 except urlerr.urlerror as e:
2698 2700 ui.warn(_('error fetching bundle: %s\n') %
2699 2701 stringutil.forcebytestr(e.reason))
2700 2702
2701 2703 return False
@@ -1,697 +1,699
1 1 # exchangev2.py - repository exchange for wire protocol version 2
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import weakref
12 12
13 13 from .i18n import _
14 14 from .node import (
15 15 nullid,
16 16 short,
17 17 )
18 18 from . import (
19 19 bookmarks,
20 20 error,
21 21 mdiff,
22 22 narrowspec,
23 23 phases,
24 24 pycompat,
25 setdiscovery,
26 )
27 from .interfaces import (
25 28 repository,
26 setdiscovery,
27 29 )
28 30
29 31 def pull(pullop):
30 32 """Pull using wire protocol version 2."""
31 33 repo = pullop.repo
32 34 remote = pullop.remote
33 35
34 36 usingrawchangelogandmanifest = _checkuserawstorefiledata(pullop)
35 37
36 38 # If this is a clone and it was requested to perform a "stream clone",
37 39 # we obtain the raw files data from the remote then fall back to an
38 40 # incremental pull. This is somewhat hacky and is not nearly robust enough
39 41 # for long-term usage.
40 42 if usingrawchangelogandmanifest:
41 43 with repo.transaction('clone'):
42 44 _fetchrawstorefiles(repo, remote)
43 45 repo.invalidate(clearfilecache=True)
44 46
45 47 tr = pullop.trmanager.transaction()
46 48
47 49 # We don't use the repo's narrow matcher here because the patterns passed
48 50 # to exchange.pull() could be different.
49 51 narrowmatcher = narrowspec.match(repo.root,
50 52 # Empty maps to nevermatcher. So always
51 53 # set includes if missing.
52 54 pullop.includepats or {'path:.'},
53 55 pullop.excludepats)
54 56
55 57 if pullop.includepats or pullop.excludepats:
56 58 pathfilter = {}
57 59 if pullop.includepats:
58 60 pathfilter[b'include'] = sorted(pullop.includepats)
59 61 if pullop.excludepats:
60 62 pathfilter[b'exclude'] = sorted(pullop.excludepats)
61 63 else:
62 64 pathfilter = None
63 65
64 66 # Figure out what needs to be fetched.
65 67 common, fetch, remoteheads = _pullchangesetdiscovery(
66 68 repo, remote, pullop.heads, abortwhenunrelated=pullop.force)
67 69
68 70 # And fetch the data.
69 71 pullheads = pullop.heads or remoteheads
70 72 csetres = _fetchchangesets(repo, tr, remote, common, fetch, pullheads)
71 73
72 74 # New revisions are written to the changelog. But all other updates
73 75 # are deferred. Do those now.
74 76
75 77 # Ensure all new changesets are draft by default. If the repo is
76 78 # publishing, the phase will be adjusted by the loop below.
77 79 if csetres['added']:
78 80 phases.registernew(repo, tr, phases.draft, csetres['added'])
79 81
80 82 # And adjust the phase of all changesets accordingly.
81 83 for phase in phases.phasenames:
82 84 if phase == b'secret' or not csetres['nodesbyphase'][phase]:
83 85 continue
84 86
85 87 phases.advanceboundary(repo, tr, phases.phasenames.index(phase),
86 88 csetres['nodesbyphase'][phase])
87 89
88 90 # Write bookmark updates.
89 91 bookmarks.updatefromremote(repo.ui, repo, csetres['bookmarks'],
90 92 remote.url(), pullop.gettransaction,
91 93 explicit=pullop.explicitbookmarks)
92 94
93 95 manres = _fetchmanifests(repo, tr, remote, csetres['manifestnodes'])
94 96
95 97 # We don't properly support shallow changeset and manifest yet. So we apply
96 98 # depth limiting locally.
97 99 if pullop.depth:
98 100 relevantcsetnodes = set()
99 101 clnode = repo.changelog.node
100 102
101 103 for rev in repo.revs(b'ancestors(%ln, %s)',
102 104 pullheads, pullop.depth - 1):
103 105 relevantcsetnodes.add(clnode(rev))
104 106
105 107 csetrelevantfilter = lambda n: n in relevantcsetnodes
106 108
107 109 else:
108 110 csetrelevantfilter = lambda n: True
109 111
110 112 # If obtaining the raw store files, we need to scan the full repo to
111 113 # derive all the changesets, manifests, and linkrevs.
112 114 if usingrawchangelogandmanifest:
113 115 csetsforfiles = []
114 116 mnodesforfiles = []
115 117 manifestlinkrevs = {}
116 118
117 119 for rev in repo:
118 120 ctx = repo[rev]
119 121 node = ctx.node()
120 122
121 123 if not csetrelevantfilter(node):
122 124 continue
123 125
124 126 mnode = ctx.manifestnode()
125 127
126 128 csetsforfiles.append(node)
127 129 mnodesforfiles.append(mnode)
128 130 manifestlinkrevs[mnode] = rev
129 131
130 132 else:
131 133 csetsforfiles = [n for n in csetres['added'] if csetrelevantfilter(n)]
132 134 mnodesforfiles = manres['added']
133 135 manifestlinkrevs = manres['linkrevs']
134 136
135 137 # Find all file nodes referenced by added manifests and fetch those
136 138 # revisions.
137 139 fnodes = _derivefilesfrommanifests(repo, narrowmatcher, mnodesforfiles)
138 140 _fetchfilesfromcsets(repo, tr, remote, pathfilter, fnodes, csetsforfiles,
139 141 manifestlinkrevs, shallow=bool(pullop.depth))
140 142
141 143 def _checkuserawstorefiledata(pullop):
142 144 """Check whether we should use rawstorefiledata command to retrieve data."""
143 145
144 146 repo = pullop.repo
145 147 remote = pullop.remote
146 148
147 149 # Command to obtain raw store data isn't available.
148 150 if b'rawstorefiledata' not in remote.apidescriptor[b'commands']:
149 151 return False
150 152
151 153 # Only honor if user requested stream clone operation.
152 154 if not pullop.streamclonerequested:
153 155 return False
154 156
155 157 # Only works on empty repos.
156 158 if len(repo):
157 159 return False
158 160
159 161 # TODO This is super hacky. There needs to be a storage API for this. We
160 162 # also need to check for compatibility with the remote.
161 163 if b'revlogv1' not in repo.requirements:
162 164 return False
163 165
164 166 return True
165 167
166 168 def _fetchrawstorefiles(repo, remote):
167 169 with remote.commandexecutor() as e:
168 170 objs = e.callcommand(b'rawstorefiledata', {
169 171 b'files': [b'changelog', b'manifestlog'],
170 172 }).result()
171 173
172 174 # First object is a summary of files data that follows.
173 175 overall = next(objs)
174 176
175 177 progress = repo.ui.makeprogress(_('clone'), total=overall[b'totalsize'],
176 178 unit=_('bytes'))
177 179 with progress:
178 180 progress.update(0)
179 181
180 182 # Next are pairs of file metadata, data.
181 183 while True:
182 184 try:
183 185 filemeta = next(objs)
184 186 except StopIteration:
185 187 break
186 188
187 189 for k in (b'location', b'path', b'size'):
188 190 if k not in filemeta:
189 191 raise error.Abort(_(b'remote file data missing key: %s')
190 192 % k)
191 193
192 194 if filemeta[b'location'] == b'store':
193 195 vfs = repo.svfs
194 196 else:
195 197 raise error.Abort(_(b'invalid location for raw file data: '
196 198 b'%s') % filemeta[b'location'])
197 199
198 200 bytesremaining = filemeta[b'size']
199 201
200 202 with vfs.open(filemeta[b'path'], b'wb') as fh:
201 203 while True:
202 204 try:
203 205 chunk = next(objs)
204 206 except StopIteration:
205 207 break
206 208
207 209 bytesremaining -= len(chunk)
208 210
209 211 if bytesremaining < 0:
210 212 raise error.Abort(_(
211 213 b'received invalid number of bytes for file '
212 214 b'data; expected %d, got extra') %
213 215 filemeta[b'size'])
214 216
215 217 progress.increment(step=len(chunk))
216 218 fh.write(chunk)
217 219
218 220 try:
219 221 if chunk.islast:
220 222 break
221 223 except AttributeError:
222 224 raise error.Abort(_(
223 225 b'did not receive indefinite length bytestring '
224 226 b'for file data'))
225 227
226 228 if bytesremaining:
227 229 raise error.Abort(_(b'received invalid number of bytes for'
228 230 b'file data; expected %d got %d') %
229 231 (filemeta[b'size'],
230 232 filemeta[b'size'] - bytesremaining))
231 233
232 234 def _pullchangesetdiscovery(repo, remote, heads, abortwhenunrelated=True):
233 235 """Determine which changesets need to be pulled."""
234 236
235 237 if heads:
236 238 knownnode = repo.changelog.hasnode
237 239 if all(knownnode(head) for head in heads):
238 240 return heads, False, heads
239 241
240 242 # TODO wire protocol version 2 is capable of more efficient discovery
241 243 # than setdiscovery. Consider implementing something better.
242 244 common, fetch, remoteheads = setdiscovery.findcommonheads(
243 245 repo.ui, repo, remote, abortwhenunrelated=abortwhenunrelated)
244 246
245 247 common = set(common)
246 248 remoteheads = set(remoteheads)
247 249
248 250 # If a remote head is filtered locally, put it back in the common set.
249 251 # See the comment in exchange._pulldiscoverychangegroup() for more.
250 252
251 253 if fetch and remoteheads:
252 254 nodemap = repo.unfiltered().changelog.nodemap
253 255
254 256 common |= {head for head in remoteheads if head in nodemap}
255 257
256 258 if set(remoteheads).issubset(common):
257 259 fetch = []
258 260
259 261 common.discard(nullid)
260 262
261 263 return common, fetch, remoteheads
262 264
263 265 def _fetchchangesets(repo, tr, remote, common, fetch, remoteheads):
264 266 # TODO consider adding a step here where we obtain the DAG shape first
265 267 # (or ask the server to slice changesets into chunks for us) so that
266 268 # we can perform multiple fetches in batches. This will facilitate
267 269 # resuming interrupted clones, higher server-side cache hit rates due
268 270 # to smaller segments, etc.
269 271 with remote.commandexecutor() as e:
270 272 objs = e.callcommand(b'changesetdata', {
271 273 b'revisions': [{
272 274 b'type': b'changesetdagrange',
273 275 b'roots': sorted(common),
274 276 b'heads': sorted(remoteheads),
275 277 }],
276 278 b'fields': {b'bookmarks', b'parents', b'phase', b'revision'},
277 279 }).result()
278 280
279 281 # The context manager waits on all response data when exiting. So
280 282 # we need to remain in the context manager in order to stream data.
281 283 return _processchangesetdata(repo, tr, objs)
282 284
283 285 def _processchangesetdata(repo, tr, objs):
284 286 repo.hook('prechangegroup', throw=True,
285 287 **pycompat.strkwargs(tr.hookargs))
286 288
287 289 urepo = repo.unfiltered()
288 290 cl = urepo.changelog
289 291
290 292 cl.delayupdate(tr)
291 293
292 294 # The first emitted object is a header describing the data that
293 295 # follows.
294 296 meta = next(objs)
295 297
296 298 progress = repo.ui.makeprogress(_('changesets'),
297 299 unit=_('chunks'),
298 300 total=meta.get(b'totalitems'))
299 301
300 302 manifestnodes = {}
301 303
302 304 def linkrev(node):
303 305 repo.ui.debug('add changeset %s\n' % short(node))
304 306 # Linkrev for changelog is always self.
305 307 return len(cl)
306 308
307 309 def onchangeset(cl, node):
308 310 progress.increment()
309 311
310 312 revision = cl.changelogrevision(node)
311 313
312 314 # We need to preserve the mapping of changelog revision to node
313 315 # so we can set the linkrev accordingly when manifests are added.
314 316 manifestnodes[cl.rev(node)] = revision.manifest
315 317
316 318 nodesbyphase = {phase: set() for phase in phases.phasenames}
317 319 remotebookmarks = {}
318 320
319 321 # addgroup() expects a 7-tuple describing revisions. This normalizes
320 322 # the wire data to that format.
321 323 #
322 324 # This loop also aggregates non-revision metadata, such as phase
323 325 # data.
324 326 def iterrevisions():
325 327 for cset in objs:
326 328 node = cset[b'node']
327 329
328 330 if b'phase' in cset:
329 331 nodesbyphase[cset[b'phase']].add(node)
330 332
331 333 for mark in cset.get(b'bookmarks', []):
332 334 remotebookmarks[mark] = node
333 335
334 336 # TODO add mechanism for extensions to examine records so they
335 337 # can siphon off custom data fields.
336 338
337 339 extrafields = {}
338 340
339 341 for field, size in cset.get(b'fieldsfollowing', []):
340 342 extrafields[field] = next(objs)
341 343
342 344 # Some entries might only be metadata only updates.
343 345 if b'revision' not in extrafields:
344 346 continue
345 347
346 348 data = extrafields[b'revision']
347 349
348 350 yield (
349 351 node,
350 352 cset[b'parents'][0],
351 353 cset[b'parents'][1],
352 354 # Linknode is always itself for changesets.
353 355 cset[b'node'],
354 356 # We always send full revisions. So delta base is not set.
355 357 nullid,
356 358 mdiff.trivialdiffheader(len(data)) + data,
357 359 # Flags not yet supported.
358 360 0,
359 361 )
360 362
361 363 added = cl.addgroup(iterrevisions(), linkrev, weakref.proxy(tr),
362 364 addrevisioncb=onchangeset)
363 365
364 366 progress.complete()
365 367
366 368 return {
367 369 'added': added,
368 370 'nodesbyphase': nodesbyphase,
369 371 'bookmarks': remotebookmarks,
370 372 'manifestnodes': manifestnodes,
371 373 }
372 374
373 375 def _fetchmanifests(repo, tr, remote, manifestnodes):
374 376 rootmanifest = repo.manifestlog.getstorage(b'')
375 377
376 378 # Some manifests can be shared between changesets. Filter out revisions
377 379 # we already know about.
378 380 fetchnodes = []
379 381 linkrevs = {}
380 382 seen = set()
381 383
382 384 for clrev, node in sorted(manifestnodes.iteritems()):
383 385 if node in seen:
384 386 continue
385 387
386 388 try:
387 389 rootmanifest.rev(node)
388 390 except error.LookupError:
389 391 fetchnodes.append(node)
390 392 linkrevs[node] = clrev
391 393
392 394 seen.add(node)
393 395
394 396 # TODO handle tree manifests
395 397
396 398 # addgroup() expects 7-tuple describing revisions. This normalizes
397 399 # the wire data to that format.
398 400 def iterrevisions(objs, progress):
399 401 for manifest in objs:
400 402 node = manifest[b'node']
401 403
402 404 extrafields = {}
403 405
404 406 for field, size in manifest.get(b'fieldsfollowing', []):
405 407 extrafields[field] = next(objs)
406 408
407 409 if b'delta' in extrafields:
408 410 basenode = manifest[b'deltabasenode']
409 411 delta = extrafields[b'delta']
410 412 elif b'revision' in extrafields:
411 413 basenode = nullid
412 414 revision = extrafields[b'revision']
413 415 delta = mdiff.trivialdiffheader(len(revision)) + revision
414 416 else:
415 417 continue
416 418
417 419 yield (
418 420 node,
419 421 manifest[b'parents'][0],
420 422 manifest[b'parents'][1],
421 423 # The value passed in is passed to the lookup function passed
422 424 # to addgroup(). We already have a map of manifest node to
423 425 # changelog revision number. So we just pass in the
424 426 # manifest node here and use linkrevs.__getitem__ as the
425 427 # resolution function.
426 428 node,
427 429 basenode,
428 430 delta,
429 431 # Flags not yet supported.
430 432 0
431 433 )
432 434
433 435 progress.increment()
434 436
435 437 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
436 438 total=len(fetchnodes))
437 439
438 440 commandmeta = remote.apidescriptor[b'commands'][b'manifestdata']
439 441 batchsize = commandmeta.get(b'recommendedbatchsize', 10000)
440 442 # TODO make size configurable on client?
441 443
442 444 # We send commands 1 at a time to the remote. This is not the most
443 445 # efficient because we incur a round trip at the end of each batch.
444 446 # However, the existing frame-based reactor keeps consuming server
445 447 # data in the background. And this results in response data buffering
446 448 # in memory. This can consume gigabytes of memory.
447 449 # TODO send multiple commands in a request once background buffering
448 450 # issues are resolved.
449 451
450 452 added = []
451 453
452 454 for i in pycompat.xrange(0, len(fetchnodes), batchsize):
453 455 batch = [node for node in fetchnodes[i:i + batchsize]]
454 456 if not batch:
455 457 continue
456 458
457 459 with remote.commandexecutor() as e:
458 460 objs = e.callcommand(b'manifestdata', {
459 461 b'tree': b'',
460 462 b'nodes': batch,
461 463 b'fields': {b'parents', b'revision'},
462 464 b'haveparents': True,
463 465 }).result()
464 466
465 467 # Chomp off header object.
466 468 next(objs)
467 469
468 470 added.extend(rootmanifest.addgroup(
469 471 iterrevisions(objs, progress),
470 472 linkrevs.__getitem__,
471 473 weakref.proxy(tr)))
472 474
473 475 progress.complete()
474 476
475 477 return {
476 478 'added': added,
477 479 'linkrevs': linkrevs,
478 480 }
479 481
480 482 def _derivefilesfrommanifests(repo, matcher, manifestnodes):
481 483 """Determine what file nodes are relevant given a set of manifest nodes.
482 484
483 485 Returns a dict mapping file paths to dicts of file node to first manifest
484 486 node.
485 487 """
486 488 ml = repo.manifestlog
487 489 fnodes = collections.defaultdict(dict)
488 490
489 491 progress = repo.ui.makeprogress(
490 492 _('scanning manifests'), total=len(manifestnodes))
491 493
492 494 with progress:
493 495 for manifestnode in manifestnodes:
494 496 m = ml.get(b'', manifestnode)
495 497
496 498 # TODO this will pull in unwanted nodes because it takes the storage
497 499 # delta into consideration. What we really want is something that
498 500 # takes the delta between the manifest's parents. And ideally we
499 501 # would ignore file nodes that are known locally. For now, ignore
500 502 # both these limitations. This will result in incremental fetches
501 503 # requesting data we already have. So this is far from ideal.
502 504 md = m.readfast()
503 505
504 506 for path, fnode in md.items():
505 507 if matcher(path):
506 508 fnodes[path].setdefault(fnode, manifestnode)
507 509
508 510 progress.increment()
509 511
510 512 return fnodes
511 513
512 514 def _fetchfiles(repo, tr, remote, fnodes, linkrevs):
513 515 """Fetch file data from explicit file revisions."""
514 516 def iterrevisions(objs, progress):
515 517 for filerevision in objs:
516 518 node = filerevision[b'node']
517 519
518 520 extrafields = {}
519 521
520 522 for field, size in filerevision.get(b'fieldsfollowing', []):
521 523 extrafields[field] = next(objs)
522 524
523 525 if b'delta' in extrafields:
524 526 basenode = filerevision[b'deltabasenode']
525 527 delta = extrafields[b'delta']
526 528 elif b'revision' in extrafields:
527 529 basenode = nullid
528 530 revision = extrafields[b'revision']
529 531 delta = mdiff.trivialdiffheader(len(revision)) + revision
530 532 else:
531 533 continue
532 534
533 535 yield (
534 536 node,
535 537 filerevision[b'parents'][0],
536 538 filerevision[b'parents'][1],
537 539 node,
538 540 basenode,
539 541 delta,
540 542 # Flags not yet supported.
541 543 0,
542 544 )
543 545
544 546 progress.increment()
545 547
546 548 progress = repo.ui.makeprogress(
547 549 _('files'), unit=_('chunks'),
548 550 total=sum(len(v) for v in fnodes.itervalues()))
549 551
550 552 # TODO make batch size configurable
551 553 batchsize = 10000
552 554 fnodeslist = [x for x in sorted(fnodes.items())]
553 555
554 556 for i in pycompat.xrange(0, len(fnodeslist), batchsize):
555 557 batch = [x for x in fnodeslist[i:i + batchsize]]
556 558 if not batch:
557 559 continue
558 560
559 561 with remote.commandexecutor() as e:
560 562 fs = []
561 563 locallinkrevs = {}
562 564
563 565 for path, nodes in batch:
564 566 fs.append((path, e.callcommand(b'filedata', {
565 567 b'path': path,
566 568 b'nodes': sorted(nodes),
567 569 b'fields': {b'parents', b'revision'},
568 570 b'haveparents': True,
569 571 })))
570 572
571 573 locallinkrevs[path] = {
572 574 node: linkrevs[manifestnode]
573 575 for node, manifestnode in nodes.iteritems()}
574 576
575 577 for path, f in fs:
576 578 objs = f.result()
577 579
578 580 # Chomp off header objects.
579 581 next(objs)
580 582
581 583 store = repo.file(path)
582 584 store.addgroup(
583 585 iterrevisions(objs, progress),
584 586 locallinkrevs[path].__getitem__,
585 587 weakref.proxy(tr))
586 588
587 589 def _fetchfilesfromcsets(repo, tr, remote, pathfilter, fnodes, csets,
588 590 manlinkrevs, shallow=False):
589 591 """Fetch file data from explicit changeset revisions."""
590 592
591 593 def iterrevisions(objs, remaining, progress):
592 594 while remaining:
593 595 filerevision = next(objs)
594 596
595 597 node = filerevision[b'node']
596 598
597 599 extrafields = {}
598 600
599 601 for field, size in filerevision.get(b'fieldsfollowing', []):
600 602 extrafields[field] = next(objs)
601 603
602 604 if b'delta' in extrafields:
603 605 basenode = filerevision[b'deltabasenode']
604 606 delta = extrafields[b'delta']
605 607 elif b'revision' in extrafields:
606 608 basenode = nullid
607 609 revision = extrafields[b'revision']
608 610 delta = mdiff.trivialdiffheader(len(revision)) + revision
609 611 else:
610 612 continue
611 613
612 614 if b'linknode' in filerevision:
613 615 linknode = filerevision[b'linknode']
614 616 else:
615 617 linknode = node
616 618
617 619 yield (
618 620 node,
619 621 filerevision[b'parents'][0],
620 622 filerevision[b'parents'][1],
621 623 linknode,
622 624 basenode,
623 625 delta,
624 626 # Flags not yet supported.
625 627 0,
626 628 )
627 629
628 630 progress.increment()
629 631 remaining -= 1
630 632
631 633 progress = repo.ui.makeprogress(
632 634 _('files'), unit=_('chunks'),
633 635 total=sum(len(v) for v in fnodes.itervalues()))
634 636
635 637 commandmeta = remote.apidescriptor[b'commands'][b'filesdata']
636 638 batchsize = commandmeta.get(b'recommendedbatchsize', 50000)
637 639
638 640 shallowfiles = repository.REPO_FEATURE_SHALLOW_FILE_STORAGE in repo.features
639 641 fields = {b'parents', b'revision'}
640 642 clrev = repo.changelog.rev
641 643
642 644 # There are no guarantees that we'll have ancestor revisions if
643 645 # a) this repo has shallow file storage b) shallow data fetching is enabled.
644 646 # Force remote to not delta against possibly unknown revisions when these
645 647 # conditions hold.
646 648 haveparents = not (shallowfiles or shallow)
647 649
648 650 # Similarly, we may not have calculated linkrevs for all incoming file
649 651 # revisions. Ask the remote to do work for us in this case.
650 652 if not haveparents:
651 653 fields.add(b'linknode')
652 654
653 655 for i in pycompat.xrange(0, len(csets), batchsize):
654 656 batch = [x for x in csets[i:i + batchsize]]
655 657 if not batch:
656 658 continue
657 659
658 660 with remote.commandexecutor() as e:
659 661 args = {
660 662 b'revisions': [{
661 663 b'type': b'changesetexplicit',
662 664 b'nodes': batch,
663 665 }],
664 666 b'fields': fields,
665 667 b'haveparents': haveparents,
666 668 }
667 669
668 670 if pathfilter:
669 671 args[b'pathfilter'] = pathfilter
670 672
671 673 objs = e.callcommand(b'filesdata', args).result()
672 674
673 675 # First object is an overall header.
674 676 overall = next(objs)
675 677
676 678 # We have overall['totalpaths'] segments.
677 679 for i in pycompat.xrange(overall[b'totalpaths']):
678 680 header = next(objs)
679 681
680 682 path = header[b'path']
681 683 store = repo.file(path)
682 684
683 685 linkrevs = {
684 686 fnode: manlinkrevs[mnode]
685 687 for fnode, mnode in fnodes[path].iteritems()}
686 688
687 689 def getlinkrev(node):
688 690 if node in linkrevs:
689 691 return linkrevs[node]
690 692 else:
691 693 return clrev(node)
692 694
693 695 store.addgroup(iterrevisions(objs, header[b'totalitems'],
694 696 progress),
695 697 getlinkrev,
696 698 weakref.proxy(tr),
697 699 maybemissingparents=shallow)
@@ -1,242 +1,244
1 1 # filelog.py - file history class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 nullid,
13 13 nullrev,
14 14 )
15 15 from . import (
16 16 error,
17 revlog,
18 )
19 from .interfaces import (
17 20 repository,
18 revlog,
19 21 )
20 22 from .utils import (
21 23 interfaceutil,
22 24 storageutil,
23 25 )
24 26
25 27 @interfaceutil.implementer(repository.ifilestorage)
26 28 class filelog(object):
27 29 def __init__(self, opener, path):
28 30 self._revlog = revlog.revlog(opener,
29 31 '/'.join(('data', path + '.i')),
30 32 censorable=True)
31 33 # Full name of the user visible file, relative to the repository root.
32 34 # Used by LFS.
33 35 self._revlog.filename = path
34 36
35 37 def __len__(self):
36 38 return len(self._revlog)
37 39
38 40 def __iter__(self):
39 41 return self._revlog.__iter__()
40 42
41 43 def hasnode(self, node):
42 44 if node in (nullid, nullrev):
43 45 return False
44 46
45 47 try:
46 48 self._revlog.rev(node)
47 49 return True
48 50 except (TypeError, ValueError, IndexError, error.LookupError):
49 51 return False
50 52
51 53 def revs(self, start=0, stop=None):
52 54 return self._revlog.revs(start=start, stop=stop)
53 55
54 56 def parents(self, node):
55 57 return self._revlog.parents(node)
56 58
57 59 def parentrevs(self, rev):
58 60 return self._revlog.parentrevs(rev)
59 61
60 62 def rev(self, node):
61 63 return self._revlog.rev(node)
62 64
63 65 def node(self, rev):
64 66 return self._revlog.node(rev)
65 67
66 68 def lookup(self, node):
67 69 return storageutil.fileidlookup(self._revlog, node,
68 70 self._revlog.indexfile)
69 71
70 72 def linkrev(self, rev):
71 73 return self._revlog.linkrev(rev)
72 74
73 75 def commonancestorsheads(self, node1, node2):
74 76 return self._revlog.commonancestorsheads(node1, node2)
75 77
76 78 # Used by dagop.blockdescendants().
77 79 def descendants(self, revs):
78 80 return self._revlog.descendants(revs)
79 81
80 82 def heads(self, start=None, stop=None):
81 83 return self._revlog.heads(start, stop)
82 84
83 85 # Used by hgweb, children extension.
84 86 def children(self, node):
85 87 return self._revlog.children(node)
86 88
87 89 def iscensored(self, rev):
88 90 return self._revlog.iscensored(rev)
89 91
90 92 def revision(self, node, _df=None, raw=False):
91 93 return self._revlog.revision(node, _df=_df, raw=raw)
92 94
93 95 def rawdata(self, node, _df=None):
94 96 return self._revlog.rawdata(node, _df=_df)
95 97
96 98 def emitrevisions(self, nodes, nodesorder=None,
97 99 revisiondata=False, assumehaveparentrevisions=False,
98 100 deltamode=repository.CG_DELTAMODE_STD):
99 101 return self._revlog.emitrevisions(
100 102 nodes, nodesorder=nodesorder, revisiondata=revisiondata,
101 103 assumehaveparentrevisions=assumehaveparentrevisions,
102 104 deltamode=deltamode)
103 105
104 106 def addrevision(self, revisiondata, transaction, linkrev, p1, p2,
105 107 node=None, flags=revlog.REVIDX_DEFAULT_FLAGS,
106 108 cachedelta=None):
107 109 return self._revlog.addrevision(revisiondata, transaction, linkrev,
108 110 p1, p2, node=node, flags=flags,
109 111 cachedelta=cachedelta)
110 112
111 113 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None,
112 114 maybemissingparents=False):
113 115 if maybemissingparents:
114 116 raise error.Abort(_('revlog storage does not support missing '
115 117 'parents write mode'))
116 118
117 119 return self._revlog.addgroup(deltas, linkmapper, transaction,
118 120 addrevisioncb=addrevisioncb)
119 121
120 122 def getstrippoint(self, minlink):
121 123 return self._revlog.getstrippoint(minlink)
122 124
123 125 def strip(self, minlink, transaction):
124 126 return self._revlog.strip(minlink, transaction)
125 127
126 128 def censorrevision(self, tr, node, tombstone=b''):
127 129 return self._revlog.censorrevision(tr, node, tombstone=tombstone)
128 130
129 131 def files(self):
130 132 return self._revlog.files()
131 133
132 134 def read(self, node):
133 135 return storageutil.filtermetadata(self.revision(node))
134 136
135 137 def add(self, text, meta, transaction, link, p1=None, p2=None):
136 138 if meta or text.startswith('\1\n'):
137 139 text = storageutil.packmeta(meta, text)
138 140 return self.addrevision(text, transaction, link, p1, p2)
139 141
140 142 def renamed(self, node):
141 143 return storageutil.filerevisioncopied(self, node)
142 144
143 145 def size(self, rev):
144 146 """return the size of a given revision"""
145 147
146 148 # for revisions with renames, we have to go the slow way
147 149 node = self.node(rev)
148 150 if self.renamed(node):
149 151 return len(self.read(node))
150 152 if self.iscensored(rev):
151 153 return 0
152 154
153 155 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
154 156 return self._revlog.size(rev)
155 157
156 158 def cmp(self, node, text):
157 159 """compare text with a given file revision
158 160
159 161 returns True if text is different than what is stored.
160 162 """
161 163 return not storageutil.filedataequivalent(self, node, text)
162 164
163 165 def verifyintegrity(self, state):
164 166 return self._revlog.verifyintegrity(state)
165 167
166 168 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
167 169 revisionscount=False, trackedsize=False,
168 170 storedsize=False):
169 171 return self._revlog.storageinfo(
170 172 exclusivefiles=exclusivefiles, sharedfiles=sharedfiles,
171 173 revisionscount=revisionscount, trackedsize=trackedsize,
172 174 storedsize=storedsize)
173 175
174 176 # TODO these aren't part of the interface and aren't internal methods.
175 177 # Callers should be fixed to not use them.
176 178
177 179 # Used by bundlefilelog, unionfilelog.
178 180 @property
179 181 def indexfile(self):
180 182 return self._revlog.indexfile
181 183
182 184 @indexfile.setter
183 185 def indexfile(self, value):
184 186 self._revlog.indexfile = value
185 187
186 188 # Used by repo upgrade.
187 189 def clone(self, tr, destrevlog, **kwargs):
188 190 if not isinstance(destrevlog, filelog):
189 191 raise error.ProgrammingError('expected filelog to clone()')
190 192
191 193 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
192 194
193 195 class narrowfilelog(filelog):
194 196 """Filelog variation to be used with narrow stores."""
195 197
196 198 def __init__(self, opener, path, narrowmatch):
197 199 super(narrowfilelog, self).__init__(opener, path)
198 200 self._narrowmatch = narrowmatch
199 201
200 202 def renamed(self, node):
201 203 res = super(narrowfilelog, self).renamed(node)
202 204
203 205 # Renames that come from outside the narrowspec are problematic
204 206 # because we may lack the base text for the rename. This can result
205 207 # in code attempting to walk the ancestry or compute a diff
206 208 # encountering a missing revision. We address this by silently
207 209 # removing rename metadata if the source file is outside the
208 210 # narrow spec.
209 211 #
210 212 # A better solution would be to see if the base revision is available,
211 213 # rather than assuming it isn't.
212 214 #
213 215 # An even better solution would be to teach all consumers of rename
214 216 # metadata that the base revision may not be available.
215 217 #
216 218 # TODO consider better ways of doing this.
217 219 if res and not self._narrowmatch(res[0]):
218 220 return None
219 221
220 222 return res
221 223
222 224 def size(self, rev):
223 225 # Because we have a custom renamed() that may lie, we need to call
224 226 # the base renamed() to report accurate results.
225 227 node = self.node(rev)
226 228 if super(narrowfilelog, self).renamed(node):
227 229 return len(self.read(node))
228 230 else:
229 231 return super(narrowfilelog, self).size(rev)
230 232
231 233 def cmp(self, node, text):
232 234 different = super(narrowfilelog, self).cmp(node, text)
233 235
234 236 # Because renamed() may lie, we may get false positives for
235 237 # different content. Check for this by comparing against the original
236 238 # renamed() implementation.
237 239 if different:
238 240 if super(narrowfilelog, self).renamed(node):
239 241 t2 = self.read(node)
240 242 return t2 != text
241 243
242 244 return different
@@ -1,1237 +1,1240
1 1 # hg.py - repository classes for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import errno
12 12 import hashlib
13 13 import os
14 14 import shutil
15 15 import stat
16 16
17 17 from .i18n import _
18 18 from .node import (
19 19 nullid,
20 20 )
21 21
22 22 from . import (
23 23 bookmarks,
24 24 bundlerepo,
25 25 cacheutil,
26 26 cmdutil,
27 27 destutil,
28 28 discovery,
29 29 error,
30 30 exchange,
31 31 extensions,
32 32 httppeer,
33 33 localrepo,
34 34 lock,
35 35 logcmdutil,
36 36 logexchange,
37 37 merge as mergemod,
38 38 narrowspec,
39 39 node,
40 40 phases,
41 41 pycompat,
42 repository as repositorymod,
43 42 scmutil,
44 43 sshpeer,
45 44 statichttprepo,
46 45 ui as uimod,
47 46 unionrepo,
48 47 url,
49 48 util,
50 49 verify as verifymod,
51 50 vfs as vfsmod,
52 51 )
53 52
53 from .interfaces import (
54 repository as repositorymod,
55 )
56
54 57 release = lock.release
55 58
56 59 # shared features
57 60 sharedbookmarks = 'bookmarks'
58 61
59 62 def _local(path):
60 63 path = util.expandpath(util.urllocalpath(path))
61 64
62 65 try:
63 66 isfile = os.path.isfile(path)
64 67 # Python 2 raises TypeError, Python 3 ValueError.
65 68 except (TypeError, ValueError) as e:
66 69 raise error.Abort(_('invalid path %s: %s') % (
67 70 path, pycompat.bytestr(e)))
68 71
69 72 return isfile and bundlerepo or localrepo
70 73
71 74 def addbranchrevs(lrepo, other, branches, revs):
72 75 peer = other.peer() # a courtesy to callers using a localrepo for other
73 76 hashbranch, branches = branches
74 77 if not hashbranch and not branches:
75 78 x = revs or None
76 79 if revs:
77 80 y = revs[0]
78 81 else:
79 82 y = None
80 83 return x, y
81 84 if revs:
82 85 revs = list(revs)
83 86 else:
84 87 revs = []
85 88
86 89 if not peer.capable('branchmap'):
87 90 if branches:
88 91 raise error.Abort(_("remote branch lookup not supported"))
89 92 revs.append(hashbranch)
90 93 return revs, revs[0]
91 94
92 95 with peer.commandexecutor() as e:
93 96 branchmap = e.callcommand('branchmap', {}).result()
94 97
95 98 def primary(branch):
96 99 if branch == '.':
97 100 if not lrepo:
98 101 raise error.Abort(_("dirstate branch not accessible"))
99 102 branch = lrepo.dirstate.branch()
100 103 if branch in branchmap:
101 104 revs.extend(node.hex(r) for r in reversed(branchmap[branch]))
102 105 return True
103 106 else:
104 107 return False
105 108
106 109 for branch in branches:
107 110 if not primary(branch):
108 111 raise error.RepoLookupError(_("unknown branch '%s'") % branch)
109 112 if hashbranch:
110 113 if not primary(hashbranch):
111 114 revs.append(hashbranch)
112 115 return revs, revs[0]
113 116
114 117 def parseurl(path, branches=None):
115 118 '''parse url#branch, returning (url, (branch, branches))'''
116 119
117 120 u = util.url(path)
118 121 branch = None
119 122 if u.fragment:
120 123 branch = u.fragment
121 124 u.fragment = None
122 125 return bytes(u), (branch, branches or [])
123 126
124 127 schemes = {
125 128 'bundle': bundlerepo,
126 129 'union': unionrepo,
127 130 'file': _local,
128 131 'http': httppeer,
129 132 'https': httppeer,
130 133 'ssh': sshpeer,
131 134 'static-http': statichttprepo,
132 135 }
133 136
134 137 def _peerlookup(path):
135 138 u = util.url(path)
136 139 scheme = u.scheme or 'file'
137 140 thing = schemes.get(scheme) or schemes['file']
138 141 try:
139 142 return thing(path)
140 143 except TypeError:
141 144 # we can't test callable(thing) because 'thing' can be an unloaded
142 145 # module that implements __call__
143 146 if not util.safehasattr(thing, 'instance'):
144 147 raise
145 148 return thing
146 149
147 150 def islocal(repo):
148 151 '''return true if repo (or path pointing to repo) is local'''
149 152 if isinstance(repo, bytes):
150 153 try:
151 154 return _peerlookup(repo).islocal(repo)
152 155 except AttributeError:
153 156 return False
154 157 return repo.local()
155 158
156 159 def openpath(ui, path, sendaccept=True):
157 160 '''open path with open if local, url.open if remote'''
158 161 pathurl = util.url(path, parsequery=False, parsefragment=False)
159 162 if pathurl.islocal():
160 163 return util.posixfile(pathurl.localpath(), 'rb')
161 164 else:
162 165 return url.open(ui, path, sendaccept=sendaccept)
163 166
164 167 # a list of (ui, repo) functions called for wire peer initialization
165 168 wirepeersetupfuncs = []
166 169
167 170 def _peerorrepo(ui, path, create=False, presetupfuncs=None,
168 171 intents=None, createopts=None):
169 172 """return a repository object for the specified path"""
170 173 obj = _peerlookup(path).instance(ui, path, create, intents=intents,
171 174 createopts=createopts)
172 175 ui = getattr(obj, "ui", ui)
173 176 for f in presetupfuncs or []:
174 177 f(ui, obj)
175 178 ui.log(b'extension', b'- executing reposetup hooks\n')
176 179 with util.timedcm('all reposetup') as allreposetupstats:
177 180 for name, module in extensions.extensions(ui):
178 181 ui.log(b'extension', b' - running reposetup for %s\n', name)
179 182 hook = getattr(module, 'reposetup', None)
180 183 if hook:
181 184 with util.timedcm('reposetup %r', name) as stats:
182 185 hook(ui, obj)
183 186 ui.log(b'extension', b' > reposetup for %s took %s\n',
184 187 name, stats)
185 188 ui.log(b'extension', b'> all reposetup took %s\n', allreposetupstats)
186 189 if not obj.local():
187 190 for f in wirepeersetupfuncs:
188 191 f(ui, obj)
189 192 return obj
190 193
191 194 def repository(ui, path='', create=False, presetupfuncs=None, intents=None,
192 195 createopts=None):
193 196 """return a repository object for the specified path"""
194 197 peer = _peerorrepo(ui, path, create, presetupfuncs=presetupfuncs,
195 198 intents=intents, createopts=createopts)
196 199 repo = peer.local()
197 200 if not repo:
198 201 raise error.Abort(_("repository '%s' is not local") %
199 202 (path or peer.url()))
200 203 return repo.filtered('visible')
201 204
202 205 def peer(uiorrepo, opts, path, create=False, intents=None, createopts=None):
203 206 '''return a repository peer for the specified path'''
204 207 rui = remoteui(uiorrepo, opts)
205 208 return _peerorrepo(rui, path, create, intents=intents,
206 209 createopts=createopts).peer()
207 210
208 211 def defaultdest(source):
209 212 '''return default destination of clone if none is given
210 213
211 214 >>> defaultdest(b'foo')
212 215 'foo'
213 216 >>> defaultdest(b'/foo/bar')
214 217 'bar'
215 218 >>> defaultdest(b'/')
216 219 ''
217 220 >>> defaultdest(b'')
218 221 ''
219 222 >>> defaultdest(b'http://example.org/')
220 223 ''
221 224 >>> defaultdest(b'http://example.org/foo/')
222 225 'foo'
223 226 '''
224 227 path = util.url(source).path
225 228 if not path:
226 229 return ''
227 230 return os.path.basename(os.path.normpath(path))
228 231
229 232 def sharedreposource(repo):
230 233 """Returns repository object for source repository of a shared repo.
231 234
232 235 If repo is not a shared repository, returns None.
233 236 """
234 237 if repo.sharedpath == repo.path:
235 238 return None
236 239
237 240 if util.safehasattr(repo, 'srcrepo') and repo.srcrepo:
238 241 return repo.srcrepo
239 242
240 243 # the sharedpath always ends in the .hg; we want the path to the repo
241 244 source = repo.vfs.split(repo.sharedpath)[0]
242 245 srcurl, branches = parseurl(source)
243 246 srcrepo = repository(repo.ui, srcurl)
244 247 repo.srcrepo = srcrepo
245 248 return srcrepo
246 249
247 250 def share(ui, source, dest=None, update=True, bookmarks=True, defaultpath=None,
248 251 relative=False):
249 252 '''create a shared repository'''
250 253
251 254 if not islocal(source):
252 255 raise error.Abort(_('can only share local repositories'))
253 256
254 257 if not dest:
255 258 dest = defaultdest(source)
256 259 else:
257 260 dest = ui.expandpath(dest)
258 261
259 262 if isinstance(source, bytes):
260 263 origsource = ui.expandpath(source)
261 264 source, branches = parseurl(origsource)
262 265 srcrepo = repository(ui, source)
263 266 rev, checkout = addbranchrevs(srcrepo, srcrepo, branches, None)
264 267 else:
265 268 srcrepo = source.local()
266 269 checkout = None
267 270
268 271 shareditems = set()
269 272 if bookmarks:
270 273 shareditems.add(sharedbookmarks)
271 274
272 275 r = repository(ui, dest, create=True, createopts={
273 276 'sharedrepo': srcrepo,
274 277 'sharedrelative': relative,
275 278 'shareditems': shareditems,
276 279 })
277 280
278 281 postshare(srcrepo, r, defaultpath=defaultpath)
279 282 r = repository(ui, dest)
280 283 _postshareupdate(r, update, checkout=checkout)
281 284 return r
282 285
283 286 def unshare(ui, repo):
284 287 """convert a shared repository to a normal one
285 288
286 289 Copy the store data to the repo and remove the sharedpath data.
287 290
288 291 Returns a new repository object representing the unshared repository.
289 292
290 293 The passed repository object is not usable after this function is
291 294 called.
292 295 """
293 296
294 297 with repo.lock():
295 298 # we use locks here because if we race with commit, we
296 299 # can end up with extra data in the cloned revlogs that's
297 300 # not pointed to by changesets, thus causing verify to
298 301 # fail
299 302 destlock = copystore(ui, repo, repo.path)
300 303 with destlock or util.nullcontextmanager():
301 304
302 305 sharefile = repo.vfs.join('sharedpath')
303 306 util.rename(sharefile, sharefile + '.old')
304 307
305 308 repo.requirements.discard('shared')
306 309 repo.requirements.discard('relshared')
307 310 repo._writerequirements()
308 311
309 312 # Removing share changes some fundamental properties of the repo instance.
310 313 # So we instantiate a new repo object and operate on it rather than
311 314 # try to keep the existing repo usable.
312 315 newrepo = repository(repo.baseui, repo.root, create=False)
313 316
314 317 # TODO: figure out how to access subrepos that exist, but were previously
315 318 # removed from .hgsub
316 319 c = newrepo['.']
317 320 subs = c.substate
318 321 for s in sorted(subs):
319 322 c.sub(s).unshare()
320 323
321 324 localrepo.poisonrepository(repo)
322 325
323 326 return newrepo
324 327
325 328 def postshare(sourcerepo, destrepo, defaultpath=None):
326 329 """Called after a new shared repo is created.
327 330
328 331 The new repo only has a requirements file and pointer to the source.
329 332 This function configures additional shared data.
330 333
331 334 Extensions can wrap this function and write additional entries to
332 335 destrepo/.hg/shared to indicate additional pieces of data to be shared.
333 336 """
334 337 default = defaultpath or sourcerepo.ui.config('paths', 'default')
335 338 if default:
336 339 template = ('[paths]\n'
337 340 'default = %s\n')
338 341 destrepo.vfs.write('hgrc', util.tonativeeol(template % default))
339 342 if repositorymod.NARROW_REQUIREMENT in sourcerepo.requirements:
340 343 with destrepo.wlock():
341 344 narrowspec.copytoworkingcopy(destrepo)
342 345
343 346 def _postshareupdate(repo, update, checkout=None):
344 347 """Maybe perform a working directory update after a shared repo is created.
345 348
346 349 ``update`` can be a boolean or a revision to update to.
347 350 """
348 351 if not update:
349 352 return
350 353
351 354 repo.ui.status(_("updating working directory\n"))
352 355 if update is not True:
353 356 checkout = update
354 357 for test in (checkout, 'default', 'tip'):
355 358 if test is None:
356 359 continue
357 360 try:
358 361 uprev = repo.lookup(test)
359 362 break
360 363 except error.RepoLookupError:
361 364 continue
362 365 _update(repo, uprev)
363 366
364 367 def copystore(ui, srcrepo, destpath):
365 368 '''copy files from store of srcrepo in destpath
366 369
367 370 returns destlock
368 371 '''
369 372 destlock = None
370 373 try:
371 374 hardlink = None
372 375 topic = _('linking') if hardlink else _('copying')
373 376 with ui.makeprogress(topic, unit=_('files')) as progress:
374 377 num = 0
375 378 srcpublishing = srcrepo.publishing()
376 379 srcvfs = vfsmod.vfs(srcrepo.sharedpath)
377 380 dstvfs = vfsmod.vfs(destpath)
378 381 for f in srcrepo.store.copylist():
379 382 if srcpublishing and f.endswith('phaseroots'):
380 383 continue
381 384 dstbase = os.path.dirname(f)
382 385 if dstbase and not dstvfs.exists(dstbase):
383 386 dstvfs.mkdir(dstbase)
384 387 if srcvfs.exists(f):
385 388 if f.endswith('data'):
386 389 # 'dstbase' may be empty (e.g. revlog format 0)
387 390 lockfile = os.path.join(dstbase, "lock")
388 391 # lock to avoid premature writing to the target
389 392 destlock = lock.lock(dstvfs, lockfile)
390 393 hardlink, n = util.copyfiles(srcvfs.join(f), dstvfs.join(f),
391 394 hardlink, progress)
392 395 num += n
393 396 if hardlink:
394 397 ui.debug("linked %d files\n" % num)
395 398 else:
396 399 ui.debug("copied %d files\n" % num)
397 400 return destlock
398 401 except: # re-raises
399 402 release(destlock)
400 403 raise
401 404
402 405 def clonewithshare(ui, peeropts, sharepath, source, srcpeer, dest, pull=False,
403 406 rev=None, update=True, stream=False):
404 407 """Perform a clone using a shared repo.
405 408
406 409 The store for the repository will be located at <sharepath>/.hg. The
407 410 specified revisions will be cloned or pulled from "source". A shared repo
408 411 will be created at "dest" and a working copy will be created if "update" is
409 412 True.
410 413 """
411 414 revs = None
412 415 if rev:
413 416 if not srcpeer.capable('lookup'):
414 417 raise error.Abort(_("src repository does not support "
415 418 "revision lookup and so doesn't "
416 419 "support clone by revision"))
417 420
418 421 # TODO this is batchable.
419 422 remoterevs = []
420 423 for r in rev:
421 424 with srcpeer.commandexecutor() as e:
422 425 remoterevs.append(e.callcommand('lookup', {
423 426 'key': r,
424 427 }).result())
425 428 revs = remoterevs
426 429
427 430 # Obtain a lock before checking for or cloning the pooled repo otherwise
428 431 # 2 clients may race creating or populating it.
429 432 pooldir = os.path.dirname(sharepath)
430 433 # lock class requires the directory to exist.
431 434 try:
432 435 util.makedir(pooldir, False)
433 436 except OSError as e:
434 437 if e.errno != errno.EEXIST:
435 438 raise
436 439
437 440 poolvfs = vfsmod.vfs(pooldir)
438 441 basename = os.path.basename(sharepath)
439 442
440 443 with lock.lock(poolvfs, '%s.lock' % basename):
441 444 if os.path.exists(sharepath):
442 445 ui.status(_('(sharing from existing pooled repository %s)\n') %
443 446 basename)
444 447 else:
445 448 ui.status(_('(sharing from new pooled repository %s)\n') % basename)
446 449 # Always use pull mode because hardlinks in share mode don't work
447 450 # well. Never update because working copies aren't necessary in
448 451 # share mode.
449 452 clone(ui, peeropts, source, dest=sharepath, pull=True,
450 453 revs=rev, update=False, stream=stream)
451 454
452 455 # Resolve the value to put in [paths] section for the source.
453 456 if islocal(source):
454 457 defaultpath = os.path.abspath(util.urllocalpath(source))
455 458 else:
456 459 defaultpath = source
457 460
458 461 sharerepo = repository(ui, path=sharepath)
459 462 destrepo = share(ui, sharerepo, dest=dest, update=False, bookmarks=False,
460 463 defaultpath=defaultpath)
461 464
462 465 # We need to perform a pull against the dest repo to fetch bookmarks
463 466 # and other non-store data that isn't shared by default. In the case of
464 467 # non-existing shared repo, this means we pull from the remote twice. This
465 468 # is a bit weird. But at the time it was implemented, there wasn't an easy
466 469 # way to pull just non-changegroup data.
467 470 exchange.pull(destrepo, srcpeer, heads=revs)
468 471
469 472 _postshareupdate(destrepo, update)
470 473
471 474 return srcpeer, peer(ui, peeropts, dest)
472 475
473 476 # Recomputing branch cache might be slow on big repos,
474 477 # so just copy it
475 478 def _copycache(srcrepo, dstcachedir, fname):
476 479 """copy a cache from srcrepo to destcachedir (if it exists)"""
477 480 srcbranchcache = srcrepo.vfs.join('cache/%s' % fname)
478 481 dstbranchcache = os.path.join(dstcachedir, fname)
479 482 if os.path.exists(srcbranchcache):
480 483 if not os.path.exists(dstcachedir):
481 484 os.mkdir(dstcachedir)
482 485 util.copyfile(srcbranchcache, dstbranchcache)
483 486
484 487 def clone(ui, peeropts, source, dest=None, pull=False, revs=None,
485 488 update=True, stream=False, branch=None, shareopts=None,
486 489 storeincludepats=None, storeexcludepats=None, depth=None):
487 490 """Make a copy of an existing repository.
488 491
489 492 Create a copy of an existing repository in a new directory. The
490 493 source and destination are URLs, as passed to the repository
491 494 function. Returns a pair of repository peers, the source and
492 495 newly created destination.
493 496
494 497 The location of the source is added to the new repository's
495 498 .hg/hgrc file, as the default to be used for future pulls and
496 499 pushes.
497 500
498 501 If an exception is raised, the partly cloned/updated destination
499 502 repository will be deleted.
500 503
501 504 Arguments:
502 505
503 506 source: repository object or URL
504 507
505 508 dest: URL of destination repository to create (defaults to base
506 509 name of source repository)
507 510
508 511 pull: always pull from source repository, even in local case or if the
509 512 server prefers streaming
510 513
511 514 stream: stream raw data uncompressed from repository (fast over
512 515 LAN, slow over WAN)
513 516
514 517 revs: revision to clone up to (implies pull=True)
515 518
516 519 update: update working directory after clone completes, if
517 520 destination is local repository (True means update to default rev,
518 521 anything else is treated as a revision)
519 522
520 523 branch: branches to clone
521 524
522 525 shareopts: dict of options to control auto sharing behavior. The "pool" key
523 526 activates auto sharing mode and defines the directory for stores. The
524 527 "mode" key determines how to construct the directory name of the shared
525 528 repository. "identity" means the name is derived from the node of the first
526 529 changeset in the repository. "remote" means the name is derived from the
527 530 remote's path/URL. Defaults to "identity."
528 531
529 532 storeincludepats and storeexcludepats: sets of file patterns to include and
530 533 exclude in the repository copy, respectively. If not defined, all files
531 534 will be included (a "full" clone). Otherwise a "narrow" clone containing
532 535 only the requested files will be performed. If ``storeincludepats`` is not
533 536 defined but ``storeexcludepats`` is, ``storeincludepats`` is assumed to be
534 537 ``path:.``. If both are empty sets, no files will be cloned.
535 538 """
536 539
537 540 if isinstance(source, bytes):
538 541 origsource = ui.expandpath(source)
539 542 source, branches = parseurl(origsource, branch)
540 543 srcpeer = peer(ui, peeropts, source)
541 544 else:
542 545 srcpeer = source.peer() # in case we were called with a localrepo
543 546 branches = (None, branch or [])
544 547 origsource = source = srcpeer.url()
545 548 revs, checkout = addbranchrevs(srcpeer, srcpeer, branches, revs)
546 549
547 550 if dest is None:
548 551 dest = defaultdest(source)
549 552 if dest:
550 553 ui.status(_("destination directory: %s\n") % dest)
551 554 else:
552 555 dest = ui.expandpath(dest)
553 556
554 557 dest = util.urllocalpath(dest)
555 558 source = util.urllocalpath(source)
556 559
557 560 if not dest:
558 561 raise error.Abort(_("empty destination path is not valid"))
559 562
560 563 destvfs = vfsmod.vfs(dest, expandpath=True)
561 564 if destvfs.lexists():
562 565 if not destvfs.isdir():
563 566 raise error.Abort(_("destination '%s' already exists") % dest)
564 567 elif destvfs.listdir():
565 568 raise error.Abort(_("destination '%s' is not empty") % dest)
566 569
567 570 createopts = {}
568 571 narrow = False
569 572
570 573 if storeincludepats is not None:
571 574 narrowspec.validatepatterns(storeincludepats)
572 575 narrow = True
573 576
574 577 if storeexcludepats is not None:
575 578 narrowspec.validatepatterns(storeexcludepats)
576 579 narrow = True
577 580
578 581 if narrow:
579 582 # Include everything by default if only exclusion patterns defined.
580 583 if storeexcludepats and not storeincludepats:
581 584 storeincludepats = {'path:.'}
582 585
583 586 createopts['narrowfiles'] = True
584 587
585 588 if depth:
586 589 createopts['shallowfilestore'] = True
587 590
588 591 if srcpeer.capable(b'lfs-serve'):
589 592 # Repository creation honors the config if it disabled the extension, so
590 593 # we can't just announce that lfs will be enabled. This check avoids
591 594 # saying that lfs will be enabled, and then saying it's an unknown
592 595 # feature. The lfs creation option is set in either case so that a
593 596 # requirement is added. If the extension is explicitly disabled but the
594 597 # requirement is set, the clone aborts early, before transferring any
595 598 # data.
596 599 createopts['lfs'] = True
597 600
598 601 if extensions.disabledext('lfs'):
599 602 ui.status(_('(remote is using large file support (lfs), but it is '
600 603 'explicitly disabled in the local configuration)\n'))
601 604 else:
602 605 ui.status(_('(remote is using large file support (lfs); lfs will '
603 606 'be enabled for this repository)\n'))
604 607
605 608 shareopts = shareopts or {}
606 609 sharepool = shareopts.get('pool')
607 610 sharenamemode = shareopts.get('mode')
608 611 if sharepool and islocal(dest):
609 612 sharepath = None
610 613 if sharenamemode == 'identity':
611 614 # Resolve the name from the initial changeset in the remote
612 615 # repository. This returns nullid when the remote is empty. It
613 616 # raises RepoLookupError if revision 0 is filtered or otherwise
614 617 # not available. If we fail to resolve, sharing is not enabled.
615 618 try:
616 619 with srcpeer.commandexecutor() as e:
617 620 rootnode = e.callcommand('lookup', {
618 621 'key': '0',
619 622 }).result()
620 623
621 624 if rootnode != node.nullid:
622 625 sharepath = os.path.join(sharepool, node.hex(rootnode))
623 626 else:
624 627 ui.status(_('(not using pooled storage: '
625 628 'remote appears to be empty)\n'))
626 629 except error.RepoLookupError:
627 630 ui.status(_('(not using pooled storage: '
628 631 'unable to resolve identity of remote)\n'))
629 632 elif sharenamemode == 'remote':
630 633 sharepath = os.path.join(
631 634 sharepool, node.hex(hashlib.sha1(source).digest()))
632 635 else:
633 636 raise error.Abort(_('unknown share naming mode: %s') %
634 637 sharenamemode)
635 638
636 639 # TODO this is a somewhat arbitrary restriction.
637 640 if narrow:
638 641 ui.status(_('(pooled storage not supported for narrow clones)\n'))
639 642 sharepath = None
640 643
641 644 if sharepath:
642 645 return clonewithshare(ui, peeropts, sharepath, source, srcpeer,
643 646 dest, pull=pull, rev=revs, update=update,
644 647 stream=stream)
645 648
646 649 srclock = destlock = cleandir = None
647 650 srcrepo = srcpeer.local()
648 651 try:
649 652 abspath = origsource
650 653 if islocal(origsource):
651 654 abspath = os.path.abspath(util.urllocalpath(origsource))
652 655
653 656 if islocal(dest):
654 657 cleandir = dest
655 658
656 659 copy = False
657 660 if (srcrepo and srcrepo.cancopy() and islocal(dest)
658 661 and not phases.hassecret(srcrepo)):
659 662 copy = not pull and not revs
660 663
661 664 # TODO this is a somewhat arbitrary restriction.
662 665 if narrow:
663 666 copy = False
664 667
665 668 if copy:
666 669 try:
667 670 # we use a lock here because if we race with commit, we
668 671 # can end up with extra data in the cloned revlogs that's
669 672 # not pointed to by changesets, thus causing verify to
670 673 # fail
671 674 srclock = srcrepo.lock(wait=False)
672 675 except error.LockError:
673 676 copy = False
674 677
675 678 if copy:
676 679 srcrepo.hook('preoutgoing', throw=True, source='clone')
677 680 hgdir = os.path.realpath(os.path.join(dest, ".hg"))
678 681 if not os.path.exists(dest):
679 682 util.makedirs(dest)
680 683 else:
681 684 # only clean up directories we create ourselves
682 685 cleandir = hgdir
683 686 try:
684 687 destpath = hgdir
685 688 util.makedir(destpath, notindexed=True)
686 689 except OSError as inst:
687 690 if inst.errno == errno.EEXIST:
688 691 cleandir = None
689 692 raise error.Abort(_("destination '%s' already exists")
690 693 % dest)
691 694 raise
692 695
693 696 destlock = copystore(ui, srcrepo, destpath)
694 697 # copy bookmarks over
695 698 srcbookmarks = srcrepo.vfs.join('bookmarks')
696 699 dstbookmarks = os.path.join(destpath, 'bookmarks')
697 700 if os.path.exists(srcbookmarks):
698 701 util.copyfile(srcbookmarks, dstbookmarks)
699 702
700 703 dstcachedir = os.path.join(destpath, 'cache')
701 704 for cache in cacheutil.cachetocopy(srcrepo):
702 705 _copycache(srcrepo, dstcachedir, cache)
703 706
704 707 # we need to re-init the repo after manually copying the data
705 708 # into it
706 709 destpeer = peer(srcrepo, peeropts, dest)
707 710 srcrepo.hook('outgoing', source='clone',
708 711 node=node.hex(node.nullid))
709 712 else:
710 713 try:
711 714 # only pass ui when no srcrepo
712 715 destpeer = peer(srcrepo or ui, peeropts, dest, create=True,
713 716 createopts=createopts)
714 717 except OSError as inst:
715 718 if inst.errno == errno.EEXIST:
716 719 cleandir = None
717 720 raise error.Abort(_("destination '%s' already exists")
718 721 % dest)
719 722 raise
720 723
721 724 if revs:
722 725 if not srcpeer.capable('lookup'):
723 726 raise error.Abort(_("src repository does not support "
724 727 "revision lookup and so doesn't "
725 728 "support clone by revision"))
726 729
727 730 # TODO this is batchable.
728 731 remoterevs = []
729 732 for rev in revs:
730 733 with srcpeer.commandexecutor() as e:
731 734 remoterevs.append(e.callcommand('lookup', {
732 735 'key': rev,
733 736 }).result())
734 737 revs = remoterevs
735 738
736 739 checkout = revs[0]
737 740 else:
738 741 revs = None
739 742 local = destpeer.local()
740 743 if local:
741 744 if narrow:
742 745 with local.wlock(), local.lock():
743 746 local.setnarrowpats(storeincludepats, storeexcludepats)
744 747 narrowspec.copytoworkingcopy(local)
745 748
746 749 u = util.url(abspath)
747 750 defaulturl = bytes(u)
748 751 local.ui.setconfig('paths', 'default', defaulturl, 'clone')
749 752 if not stream:
750 753 if pull:
751 754 stream = False
752 755 else:
753 756 stream = None
754 757 # internal config: ui.quietbookmarkmove
755 758 overrides = {('ui', 'quietbookmarkmove'): True}
756 759 with local.ui.configoverride(overrides, 'clone'):
757 760 exchange.pull(local, srcpeer, revs,
758 761 streamclonerequested=stream,
759 762 includepats=storeincludepats,
760 763 excludepats=storeexcludepats,
761 764 depth=depth)
762 765 elif srcrepo:
763 766 # TODO lift restriction once exchange.push() accepts narrow
764 767 # push.
765 768 if narrow:
766 769 raise error.Abort(_('narrow clone not available for '
767 770 'remote destinations'))
768 771
769 772 exchange.push(srcrepo, destpeer, revs=revs,
770 773 bookmarks=srcrepo._bookmarks.keys())
771 774 else:
772 775 raise error.Abort(_("clone from remote to remote not supported")
773 776 )
774 777
775 778 cleandir = None
776 779
777 780 destrepo = destpeer.local()
778 781 if destrepo:
779 782 template = uimod.samplehgrcs['cloned']
780 783 u = util.url(abspath)
781 784 u.passwd = None
782 785 defaulturl = bytes(u)
783 786 destrepo.vfs.write('hgrc', util.tonativeeol(template % defaulturl))
784 787 destrepo.ui.setconfig('paths', 'default', defaulturl, 'clone')
785 788
786 789 if ui.configbool('experimental', 'remotenames'):
787 790 logexchange.pullremotenames(destrepo, srcpeer)
788 791
789 792 if update:
790 793 if update is not True:
791 794 with srcpeer.commandexecutor() as e:
792 795 checkout = e.callcommand('lookup', {
793 796 'key': update,
794 797 }).result()
795 798
796 799 uprev = None
797 800 status = None
798 801 if checkout is not None:
799 802 # Some extensions (at least hg-git and hg-subversion) have
800 803 # a peer.lookup() implementation that returns a name instead
801 804 # of a nodeid. We work around it here until we've figured
802 805 # out a better solution.
803 806 if len(checkout) == 20 and checkout in destrepo:
804 807 uprev = checkout
805 808 elif scmutil.isrevsymbol(destrepo, checkout):
806 809 uprev = scmutil.revsymbol(destrepo, checkout).node()
807 810 else:
808 811 if update is not True:
809 812 try:
810 813 uprev = destrepo.lookup(update)
811 814 except error.RepoLookupError:
812 815 pass
813 816 if uprev is None:
814 817 try:
815 818 uprev = destrepo._bookmarks['@']
816 819 update = '@'
817 820 bn = destrepo[uprev].branch()
818 821 if bn == 'default':
819 822 status = _("updating to bookmark @\n")
820 823 else:
821 824 status = (_("updating to bookmark @ on branch %s\n")
822 825 % bn)
823 826 except KeyError:
824 827 try:
825 828 uprev = destrepo.branchtip('default')
826 829 except error.RepoLookupError:
827 830 uprev = destrepo.lookup('tip')
828 831 if not status:
829 832 bn = destrepo[uprev].branch()
830 833 status = _("updating to branch %s\n") % bn
831 834 destrepo.ui.status(status)
832 835 _update(destrepo, uprev)
833 836 if update in destrepo._bookmarks:
834 837 bookmarks.activate(destrepo, update)
835 838 finally:
836 839 release(srclock, destlock)
837 840 if cleandir is not None:
838 841 shutil.rmtree(cleandir, True)
839 842 if srcpeer is not None:
840 843 srcpeer.close()
841 844 return srcpeer, destpeer
842 845
843 846 def _showstats(repo, stats, quietempty=False):
844 847 if quietempty and stats.isempty():
845 848 return
846 849 repo.ui.status(_("%d files updated, %d files merged, "
847 850 "%d files removed, %d files unresolved\n") % (
848 851 stats.updatedcount, stats.mergedcount,
849 852 stats.removedcount, stats.unresolvedcount))
850 853
851 854 def updaterepo(repo, node, overwrite, updatecheck=None):
852 855 """Update the working directory to node.
853 856
854 857 When overwrite is set, changes are clobbered, merged else
855 858
856 859 returns stats (see pydoc mercurial.merge.applyupdates)"""
857 860 return mergemod.update(repo, node, branchmerge=False, force=overwrite,
858 861 labels=['working copy', 'destination'],
859 862 updatecheck=updatecheck)
860 863
861 864 def update(repo, node, quietempty=False, updatecheck=None):
862 865 """update the working directory to node"""
863 866 stats = updaterepo(repo, node, False, updatecheck=updatecheck)
864 867 _showstats(repo, stats, quietempty)
865 868 if stats.unresolvedcount:
866 869 repo.ui.status(_("use 'hg resolve' to retry unresolved file merges\n"))
867 870 return stats.unresolvedcount > 0
868 871
869 872 # naming conflict in clone()
870 873 _update = update
871 874
872 875 def clean(repo, node, show_stats=True, quietempty=False):
873 876 """forcibly switch the working directory to node, clobbering changes"""
874 877 stats = updaterepo(repo, node, True)
875 878 repo.vfs.unlinkpath('graftstate', ignoremissing=True)
876 879 if show_stats:
877 880 _showstats(repo, stats, quietempty)
878 881 return stats.unresolvedcount > 0
879 882
880 883 # naming conflict in updatetotally()
881 884 _clean = clean
882 885
883 886 def updatetotally(ui, repo, checkout, brev, clean=False, updatecheck=None):
884 887 """Update the working directory with extra care for non-file components
885 888
886 889 This takes care of non-file components below:
887 890
888 891 :bookmark: might be advanced or (in)activated
889 892
890 893 This takes arguments below:
891 894
892 895 :checkout: to which revision the working directory is updated
893 896 :brev: a name, which might be a bookmark to be activated after updating
894 897 :clean: whether changes in the working directory can be discarded
895 898 :updatecheck: how to deal with a dirty working directory
896 899
897 900 Valid values for updatecheck are (None => linear):
898 901
899 902 * abort: abort if the working directory is dirty
900 903 * none: don't check (merge working directory changes into destination)
901 904 * linear: check that update is linear before merging working directory
902 905 changes into destination
903 906 * noconflict: check that the update does not result in file merges
904 907
905 908 This returns whether conflict is detected at updating or not.
906 909 """
907 910 if updatecheck is None:
908 911 updatecheck = ui.config('commands', 'update.check')
909 912 if updatecheck not in ('abort', 'none', 'linear', 'noconflict'):
910 913 # If not configured, or invalid value configured
911 914 updatecheck = 'linear'
912 915 with repo.wlock():
913 916 movemarkfrom = None
914 917 warndest = False
915 918 if checkout is None:
916 919 updata = destutil.destupdate(repo, clean=clean)
917 920 checkout, movemarkfrom, brev = updata
918 921 warndest = True
919 922
920 923 if clean:
921 924 ret = _clean(repo, checkout)
922 925 else:
923 926 if updatecheck == 'abort':
924 927 cmdutil.bailifchanged(repo, merge=False)
925 928 updatecheck = 'none'
926 929 ret = _update(repo, checkout, updatecheck=updatecheck)
927 930
928 931 if not ret and movemarkfrom:
929 932 if movemarkfrom == repo['.'].node():
930 933 pass # no-op update
931 934 elif bookmarks.update(repo, [movemarkfrom], repo['.'].node()):
932 935 b = ui.label(repo._activebookmark, 'bookmarks.active')
933 936 ui.status(_("updating bookmark %s\n") % b)
934 937 else:
935 938 # this can happen with a non-linear update
936 939 b = ui.label(repo._activebookmark, 'bookmarks')
937 940 ui.status(_("(leaving bookmark %s)\n") % b)
938 941 bookmarks.deactivate(repo)
939 942 elif brev in repo._bookmarks:
940 943 if brev != repo._activebookmark:
941 944 b = ui.label(brev, 'bookmarks.active')
942 945 ui.status(_("(activating bookmark %s)\n") % b)
943 946 bookmarks.activate(repo, brev)
944 947 elif brev:
945 948 if repo._activebookmark:
946 949 b = ui.label(repo._activebookmark, 'bookmarks')
947 950 ui.status(_("(leaving bookmark %s)\n") % b)
948 951 bookmarks.deactivate(repo)
949 952
950 953 if warndest:
951 954 destutil.statusotherdests(ui, repo)
952 955
953 956 return ret
954 957
955 958 def merge(repo, node, force=None, remind=True, mergeforce=False, labels=None,
956 959 abort=False):
957 960 """Branch merge with node, resolving changes. Return true if any
958 961 unresolved conflicts."""
959 962 if abort:
960 963 return abortmerge(repo.ui, repo)
961 964
962 965 stats = mergemod.update(repo, node, branchmerge=True, force=force,
963 966 mergeforce=mergeforce, labels=labels)
964 967 _showstats(repo, stats)
965 968 if stats.unresolvedcount:
966 969 repo.ui.status(_("use 'hg resolve' to retry unresolved file merges "
967 970 "or 'hg merge --abort' to abandon\n"))
968 971 elif remind:
969 972 repo.ui.status(_("(branch merge, don't forget to commit)\n"))
970 973 return stats.unresolvedcount > 0
971 974
972 975 def abortmerge(ui, repo):
973 976 ms = mergemod.mergestate.read(repo)
974 977 if ms.active():
975 978 # there were conflicts
976 979 node = ms.localctx.hex()
977 980 else:
978 981 # there were no conficts, mergestate was not stored
979 982 node = repo['.'].hex()
980 983
981 984 repo.ui.status(_("aborting the merge, updating back to"
982 985 " %s\n") % node[:12])
983 986 stats = mergemod.update(repo, node, branchmerge=False, force=True)
984 987 _showstats(repo, stats)
985 988 return stats.unresolvedcount > 0
986 989
987 990 def _incoming(displaychlist, subreporecurse, ui, repo, source,
988 991 opts, buffered=False):
989 992 """
990 993 Helper for incoming / gincoming.
991 994 displaychlist gets called with
992 995 (remoterepo, incomingchangesetlist, displayer) parameters,
993 996 and is supposed to contain only code that can't be unified.
994 997 """
995 998 source, branches = parseurl(ui.expandpath(source), opts.get('branch'))
996 999 other = peer(repo, opts, source)
997 1000 ui.status(_('comparing with %s\n') % util.hidepassword(source))
998 1001 revs, checkout = addbranchrevs(repo, other, branches, opts.get('rev'))
999 1002
1000 1003 if revs:
1001 1004 revs = [other.lookup(rev) for rev in revs]
1002 1005 other, chlist, cleanupfn = bundlerepo.getremotechanges(ui, repo, other,
1003 1006 revs, opts["bundle"], opts["force"])
1004 1007 try:
1005 1008 if not chlist:
1006 1009 ui.status(_("no changes found\n"))
1007 1010 return subreporecurse()
1008 1011 ui.pager('incoming')
1009 1012 displayer = logcmdutil.changesetdisplayer(ui, other, opts,
1010 1013 buffered=buffered)
1011 1014 displaychlist(other, chlist, displayer)
1012 1015 displayer.close()
1013 1016 finally:
1014 1017 cleanupfn()
1015 1018 subreporecurse()
1016 1019 return 0 # exit code is zero since we found incoming changes
1017 1020
1018 1021 def incoming(ui, repo, source, opts):
1019 1022 def subreporecurse():
1020 1023 ret = 1
1021 1024 if opts.get('subrepos'):
1022 1025 ctx = repo[None]
1023 1026 for subpath in sorted(ctx.substate):
1024 1027 sub = ctx.sub(subpath)
1025 1028 ret = min(ret, sub.incoming(ui, source, opts))
1026 1029 return ret
1027 1030
1028 1031 def display(other, chlist, displayer):
1029 1032 limit = logcmdutil.getlimit(opts)
1030 1033 if opts.get('newest_first'):
1031 1034 chlist.reverse()
1032 1035 count = 0
1033 1036 for n in chlist:
1034 1037 if limit is not None and count >= limit:
1035 1038 break
1036 1039 parents = [p for p in other.changelog.parents(n) if p != nullid]
1037 1040 if opts.get('no_merges') and len(parents) == 2:
1038 1041 continue
1039 1042 count += 1
1040 1043 displayer.show(other[n])
1041 1044 return _incoming(display, subreporecurse, ui, repo, source, opts)
1042 1045
1043 1046 def _outgoing(ui, repo, dest, opts):
1044 1047 path = ui.paths.getpath(dest, default=('default-push', 'default'))
1045 1048 if not path:
1046 1049 raise error.Abort(_('default repository not configured!'),
1047 1050 hint=_("see 'hg help config.paths'"))
1048 1051 dest = path.pushloc or path.loc
1049 1052 branches = path.branch, opts.get('branch') or []
1050 1053
1051 1054 ui.status(_('comparing with %s\n') % util.hidepassword(dest))
1052 1055 revs, checkout = addbranchrevs(repo, repo, branches, opts.get('rev'))
1053 1056 if revs:
1054 1057 revs = [repo[rev].node() for rev in scmutil.revrange(repo, revs)]
1055 1058
1056 1059 other = peer(repo, opts, dest)
1057 1060 outgoing = discovery.findcommonoutgoing(repo, other, revs,
1058 1061 force=opts.get('force'))
1059 1062 o = outgoing.missing
1060 1063 if not o:
1061 1064 scmutil.nochangesfound(repo.ui, repo, outgoing.excluded)
1062 1065 return o, other
1063 1066
1064 1067 def outgoing(ui, repo, dest, opts):
1065 1068 def recurse():
1066 1069 ret = 1
1067 1070 if opts.get('subrepos'):
1068 1071 ctx = repo[None]
1069 1072 for subpath in sorted(ctx.substate):
1070 1073 sub = ctx.sub(subpath)
1071 1074 ret = min(ret, sub.outgoing(ui, dest, opts))
1072 1075 return ret
1073 1076
1074 1077 limit = logcmdutil.getlimit(opts)
1075 1078 o, other = _outgoing(ui, repo, dest, opts)
1076 1079 if not o:
1077 1080 cmdutil.outgoinghooks(ui, repo, other, opts, o)
1078 1081 return recurse()
1079 1082
1080 1083 if opts.get('newest_first'):
1081 1084 o.reverse()
1082 1085 ui.pager('outgoing')
1083 1086 displayer = logcmdutil.changesetdisplayer(ui, repo, opts)
1084 1087 count = 0
1085 1088 for n in o:
1086 1089 if limit is not None and count >= limit:
1087 1090 break
1088 1091 parents = [p for p in repo.changelog.parents(n) if p != nullid]
1089 1092 if opts.get('no_merges') and len(parents) == 2:
1090 1093 continue
1091 1094 count += 1
1092 1095 displayer.show(repo[n])
1093 1096 displayer.close()
1094 1097 cmdutil.outgoinghooks(ui, repo, other, opts, o)
1095 1098 recurse()
1096 1099 return 0 # exit code is zero since we found outgoing changes
1097 1100
1098 1101 def verify(repo, level=None):
1099 1102 """verify the consistency of a repository"""
1100 1103 ret = verifymod.verify(repo, level=level)
1101 1104
1102 1105 # Broken subrepo references in hidden csets don't seem worth worrying about,
1103 1106 # since they can't be pushed/pulled, and --hidden can be used if they are a
1104 1107 # concern.
1105 1108
1106 1109 # pathto() is needed for -R case
1107 1110 revs = repo.revs("filelog(%s)",
1108 1111 util.pathto(repo.root, repo.getcwd(), '.hgsubstate'))
1109 1112
1110 1113 if revs:
1111 1114 repo.ui.status(_('checking subrepo links\n'))
1112 1115 for rev in revs:
1113 1116 ctx = repo[rev]
1114 1117 try:
1115 1118 for subpath in ctx.substate:
1116 1119 try:
1117 1120 ret = (ctx.sub(subpath, allowcreate=False).verify()
1118 1121 or ret)
1119 1122 except error.RepoError as e:
1120 1123 repo.ui.warn(('%d: %s\n') % (rev, e))
1121 1124 except Exception:
1122 1125 repo.ui.warn(_('.hgsubstate is corrupt in revision %s\n') %
1123 1126 node.short(ctx.node()))
1124 1127
1125 1128 return ret
1126 1129
1127 1130 def remoteui(src, opts):
1128 1131 'build a remote ui from ui or repo and opts'
1129 1132 if util.safehasattr(src, 'baseui'): # looks like a repository
1130 1133 dst = src.baseui.copy() # drop repo-specific config
1131 1134 src = src.ui # copy target options from repo
1132 1135 else: # assume it's a global ui object
1133 1136 dst = src.copy() # keep all global options
1134 1137
1135 1138 # copy ssh-specific options
1136 1139 for o in 'ssh', 'remotecmd':
1137 1140 v = opts.get(o) or src.config('ui', o)
1138 1141 if v:
1139 1142 dst.setconfig("ui", o, v, 'copied')
1140 1143
1141 1144 # copy bundle-specific options
1142 1145 r = src.config('bundle', 'mainreporoot')
1143 1146 if r:
1144 1147 dst.setconfig('bundle', 'mainreporoot', r, 'copied')
1145 1148
1146 1149 # copy selected local settings to the remote ui
1147 1150 for sect in ('auth', 'hostfingerprints', 'hostsecurity', 'http_proxy'):
1148 1151 for key, val in src.configitems(sect):
1149 1152 dst.setconfig(sect, key, val, 'copied')
1150 1153 v = src.config('web', 'cacerts')
1151 1154 if v:
1152 1155 dst.setconfig('web', 'cacerts', util.expandpath(v), 'copied')
1153 1156
1154 1157 return dst
1155 1158
1156 1159 # Files of interest
1157 1160 # Used to check if the repository has changed looking at mtime and size of
1158 1161 # these files.
1159 1162 foi = [('spath', '00changelog.i'),
1160 1163 ('spath', 'phaseroots'), # ! phase can change content at the same size
1161 1164 ('spath', 'obsstore'),
1162 1165 ('path', 'bookmarks'), # ! bookmark can change content at the same size
1163 1166 ]
1164 1167
1165 1168 class cachedlocalrepo(object):
1166 1169 """Holds a localrepository that can be cached and reused."""
1167 1170
1168 1171 def __init__(self, repo):
1169 1172 """Create a new cached repo from an existing repo.
1170 1173
1171 1174 We assume the passed in repo was recently created. If the
1172 1175 repo has changed between when it was created and when it was
1173 1176 turned into a cache, it may not refresh properly.
1174 1177 """
1175 1178 assert isinstance(repo, localrepo.localrepository)
1176 1179 self._repo = repo
1177 1180 self._state, self.mtime = self._repostate()
1178 1181 self._filtername = repo.filtername
1179 1182
1180 1183 def fetch(self):
1181 1184 """Refresh (if necessary) and return a repository.
1182 1185
1183 1186 If the cached instance is out of date, it will be recreated
1184 1187 automatically and returned.
1185 1188
1186 1189 Returns a tuple of the repo and a boolean indicating whether a new
1187 1190 repo instance was created.
1188 1191 """
1189 1192 # We compare the mtimes and sizes of some well-known files to
1190 1193 # determine if the repo changed. This is not precise, as mtimes
1191 1194 # are susceptible to clock skew and imprecise filesystems and
1192 1195 # file content can change while maintaining the same size.
1193 1196
1194 1197 state, mtime = self._repostate()
1195 1198 if state == self._state:
1196 1199 return self._repo, False
1197 1200
1198 1201 repo = repository(self._repo.baseui, self._repo.url())
1199 1202 if self._filtername:
1200 1203 self._repo = repo.filtered(self._filtername)
1201 1204 else:
1202 1205 self._repo = repo.unfiltered()
1203 1206 self._state = state
1204 1207 self.mtime = mtime
1205 1208
1206 1209 return self._repo, True
1207 1210
1208 1211 def _repostate(self):
1209 1212 state = []
1210 1213 maxmtime = -1
1211 1214 for attr, fname in foi:
1212 1215 prefix = getattr(self._repo, attr)
1213 1216 p = os.path.join(prefix, fname)
1214 1217 try:
1215 1218 st = os.stat(p)
1216 1219 except OSError:
1217 1220 st = os.stat(prefix)
1218 1221 state.append((st[stat.ST_MTIME], st.st_size))
1219 1222 maxmtime = max(maxmtime, st[stat.ST_MTIME])
1220 1223
1221 1224 return tuple(state), maxmtime
1222 1225
1223 1226 def copy(self):
1224 1227 """Obtain a copy of this class instance.
1225 1228
1226 1229 A new localrepository instance is obtained. The new instance should be
1227 1230 completely independent of the original.
1228 1231 """
1229 1232 repo = repository(self._repo.baseui, self._repo.origroot)
1230 1233 if self._filtername:
1231 1234 repo = repo.filtered(self._filtername)
1232 1235 else:
1233 1236 repo = repo.unfiltered()
1234 1237 c = cachedlocalrepo(repo)
1235 1238 c._state = self._state
1236 1239 c.mtime = self.mtime
1237 1240 return c
@@ -1,1010 +1,1012
1 1 # httppeer.py - HTTP repository proxy classes for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import errno
12 12 import io
13 13 import os
14 14 import socket
15 15 import struct
16 16 import weakref
17 17
18 18 from .i18n import _
19 from .interfaces import (
20 repository,
21 )
19 22 from . import (
20 23 bundle2,
21 24 error,
22 25 httpconnection,
23 26 pycompat,
24 repository,
25 27 statichttprepo,
26 28 url as urlmod,
27 29 util,
28 30 wireprotoframing,
29 31 wireprototypes,
30 32 wireprotov1peer,
31 33 wireprotov2peer,
32 34 wireprotov2server,
33 35 )
34 36 from .utils import (
35 37 cborutil,
36 38 interfaceutil,
37 39 stringutil,
38 40 )
39 41
40 42 httplib = util.httplib
41 43 urlerr = util.urlerr
42 44 urlreq = util.urlreq
43 45
44 46 def encodevalueinheaders(value, header, limit):
45 47 """Encode a string value into multiple HTTP headers.
46 48
47 49 ``value`` will be encoded into 1 or more HTTP headers with the names
48 50 ``header-<N>`` where ``<N>`` is an integer starting at 1. Each header
49 51 name + value will be at most ``limit`` bytes long.
50 52
51 53 Returns an iterable of 2-tuples consisting of header names and
52 54 values as native strings.
53 55 """
54 56 # HTTP Headers are ASCII. Python 3 requires them to be unicodes,
55 57 # not bytes. This function always takes bytes in as arguments.
56 58 fmt = pycompat.strurl(header) + r'-%s'
57 59 # Note: it is *NOT* a bug that the last bit here is a bytestring
58 60 # and not a unicode: we're just getting the encoded length anyway,
59 61 # and using an r-string to make it portable between Python 2 and 3
60 62 # doesn't work because then the \r is a literal backslash-r
61 63 # instead of a carriage return.
62 64 valuelen = limit - len(fmt % r'000') - len(': \r\n')
63 65 result = []
64 66
65 67 n = 0
66 68 for i in pycompat.xrange(0, len(value), valuelen):
67 69 n += 1
68 70 result.append((fmt % str(n), pycompat.strurl(value[i:i + valuelen])))
69 71
70 72 return result
71 73
72 74 class _multifile(object):
73 75 def __init__(self, *fileobjs):
74 76 for f in fileobjs:
75 77 if not util.safehasattr(f, 'length'):
76 78 raise ValueError(
77 79 '_multifile only supports file objects that '
78 80 'have a length but this one does not:', type(f), f)
79 81 self._fileobjs = fileobjs
80 82 self._index = 0
81 83
82 84 @property
83 85 def length(self):
84 86 return sum(f.length for f in self._fileobjs)
85 87
86 88 def read(self, amt=None):
87 89 if amt <= 0:
88 90 return ''.join(f.read() for f in self._fileobjs)
89 91 parts = []
90 92 while amt and self._index < len(self._fileobjs):
91 93 parts.append(self._fileobjs[self._index].read(amt))
92 94 got = len(parts[-1])
93 95 if got < amt:
94 96 self._index += 1
95 97 amt -= got
96 98 return ''.join(parts)
97 99
98 100 def seek(self, offset, whence=os.SEEK_SET):
99 101 if whence != os.SEEK_SET:
100 102 raise NotImplementedError(
101 103 '_multifile does not support anything other'
102 104 ' than os.SEEK_SET for whence on seek()')
103 105 if offset != 0:
104 106 raise NotImplementedError(
105 107 '_multifile only supports seeking to start, but that '
106 108 'could be fixed if you need it')
107 109 for f in self._fileobjs:
108 110 f.seek(0)
109 111 self._index = 0
110 112
111 113 def makev1commandrequest(ui, requestbuilder, caps, capablefn,
112 114 repobaseurl, cmd, args):
113 115 """Make an HTTP request to run a command for a version 1 client.
114 116
115 117 ``caps`` is a set of known server capabilities. The value may be
116 118 None if capabilities are not yet known.
117 119
118 120 ``capablefn`` is a function to evaluate a capability.
119 121
120 122 ``cmd``, ``args``, and ``data`` define the command, its arguments, and
121 123 raw data to pass to it.
122 124 """
123 125 if cmd == 'pushkey':
124 126 args['data'] = ''
125 127 data = args.pop('data', None)
126 128 headers = args.pop('headers', {})
127 129
128 130 ui.debug("sending %s command\n" % cmd)
129 131 q = [('cmd', cmd)]
130 132 headersize = 0
131 133 # Important: don't use self.capable() here or else you end up
132 134 # with infinite recursion when trying to look up capabilities
133 135 # for the first time.
134 136 postargsok = caps is not None and 'httppostargs' in caps
135 137
136 138 # Send arguments via POST.
137 139 if postargsok and args:
138 140 strargs = urlreq.urlencode(sorted(args.items()))
139 141 if not data:
140 142 data = strargs
141 143 else:
142 144 if isinstance(data, bytes):
143 145 i = io.BytesIO(data)
144 146 i.length = len(data)
145 147 data = i
146 148 argsio = io.BytesIO(strargs)
147 149 argsio.length = len(strargs)
148 150 data = _multifile(argsio, data)
149 151 headers[r'X-HgArgs-Post'] = len(strargs)
150 152 elif args:
151 153 # Calling self.capable() can infinite loop if we are calling
152 154 # "capabilities". But that command should never accept wire
153 155 # protocol arguments. So this should never happen.
154 156 assert cmd != 'capabilities'
155 157 httpheader = capablefn('httpheader')
156 158 if httpheader:
157 159 headersize = int(httpheader.split(',', 1)[0])
158 160
159 161 # Send arguments via HTTP headers.
160 162 if headersize > 0:
161 163 # The headers can typically carry more data than the URL.
162 164 encargs = urlreq.urlencode(sorted(args.items()))
163 165 for header, value in encodevalueinheaders(encargs, 'X-HgArg',
164 166 headersize):
165 167 headers[header] = value
166 168 # Send arguments via query string (Mercurial <1.9).
167 169 else:
168 170 q += sorted(args.items())
169 171
170 172 qs = '?%s' % urlreq.urlencode(q)
171 173 cu = "%s%s" % (repobaseurl, qs)
172 174 size = 0
173 175 if util.safehasattr(data, 'length'):
174 176 size = data.length
175 177 elif data is not None:
176 178 size = len(data)
177 179 if data is not None and r'Content-Type' not in headers:
178 180 headers[r'Content-Type'] = r'application/mercurial-0.1'
179 181
180 182 # Tell the server we accept application/mercurial-0.2 and multiple
181 183 # compression formats if the server is capable of emitting those
182 184 # payloads.
183 185 # Note: Keep this set empty by default, as client advertisement of
184 186 # protocol parameters should only occur after the handshake.
185 187 protoparams = set()
186 188
187 189 mediatypes = set()
188 190 if caps is not None:
189 191 mt = capablefn('httpmediatype')
190 192 if mt:
191 193 protoparams.add('0.1')
192 194 mediatypes = set(mt.split(','))
193 195
194 196 protoparams.add('partial-pull')
195 197
196 198 if '0.2tx' in mediatypes:
197 199 protoparams.add('0.2')
198 200
199 201 if '0.2tx' in mediatypes and capablefn('compression'):
200 202 # We /could/ compare supported compression formats and prune
201 203 # non-mutually supported or error if nothing is mutually supported.
202 204 # For now, send the full list to the server and have it error.
203 205 comps = [e.wireprotosupport().name for e in
204 206 util.compengines.supportedwireengines(util.CLIENTROLE)]
205 207 protoparams.add('comp=%s' % ','.join(comps))
206 208
207 209 if protoparams:
208 210 protoheaders = encodevalueinheaders(' '.join(sorted(protoparams)),
209 211 'X-HgProto',
210 212 headersize or 1024)
211 213 for header, value in protoheaders:
212 214 headers[header] = value
213 215
214 216 varyheaders = []
215 217 for header in headers:
216 218 if header.lower().startswith(r'x-hg'):
217 219 varyheaders.append(header)
218 220
219 221 if varyheaders:
220 222 headers[r'Vary'] = r','.join(sorted(varyheaders))
221 223
222 224 req = requestbuilder(pycompat.strurl(cu), data, headers)
223 225
224 226 if data is not None:
225 227 ui.debug("sending %d bytes\n" % size)
226 228 req.add_unredirected_header(r'Content-Length', r'%d' % size)
227 229
228 230 return req, cu, qs
229 231
230 232 def _reqdata(req):
231 233 """Get request data, if any. If no data, returns None."""
232 234 if pycompat.ispy3:
233 235 return req.data
234 236 if not req.has_data():
235 237 return None
236 238 return req.get_data()
237 239
238 240 def sendrequest(ui, opener, req):
239 241 """Send a prepared HTTP request.
240 242
241 243 Returns the response object.
242 244 """
243 245 dbg = ui.debug
244 246 if (ui.debugflag
245 247 and ui.configbool('devel', 'debug.peer-request')):
246 248 line = 'devel-peer-request: %s\n'
247 249 dbg(line % '%s %s' % (pycompat.bytesurl(req.get_method()),
248 250 pycompat.bytesurl(req.get_full_url())))
249 251 hgargssize = None
250 252
251 253 for header, value in sorted(req.header_items()):
252 254 header = pycompat.bytesurl(header)
253 255 value = pycompat.bytesurl(value)
254 256 if header.startswith('X-hgarg-'):
255 257 if hgargssize is None:
256 258 hgargssize = 0
257 259 hgargssize += len(value)
258 260 else:
259 261 dbg(line % ' %s %s' % (header, value))
260 262
261 263 if hgargssize is not None:
262 264 dbg(line % ' %d bytes of commands arguments in headers'
263 265 % hgargssize)
264 266 data = _reqdata(req)
265 267 if data is not None:
266 268 length = getattr(data, 'length', None)
267 269 if length is None:
268 270 length = len(data)
269 271 dbg(line % ' %d bytes of data' % length)
270 272
271 273 start = util.timer()
272 274
273 275 res = None
274 276 try:
275 277 res = opener.open(req)
276 278 except urlerr.httperror as inst:
277 279 if inst.code == 401:
278 280 raise error.Abort(_('authorization failed'))
279 281 raise
280 282 except httplib.HTTPException as inst:
281 283 ui.debug('http error requesting %s\n' %
282 284 util.hidepassword(req.get_full_url()))
283 285 ui.traceback()
284 286 raise IOError(None, inst)
285 287 finally:
286 288 if ui.debugflag and ui.configbool('devel', 'debug.peer-request'):
287 289 code = res.code if res else -1
288 290 dbg(line % ' finished in %.4f seconds (%d)'
289 291 % (util.timer() - start, code))
290 292
291 293 # Insert error handlers for common I/O failures.
292 294 urlmod.wrapresponse(res)
293 295
294 296 return res
295 297
296 298 class RedirectedRepoError(error.RepoError):
297 299 def __init__(self, msg, respurl):
298 300 super(RedirectedRepoError, self).__init__(msg)
299 301 self.respurl = respurl
300 302
301 303 def parsev1commandresponse(ui, baseurl, requrl, qs, resp, compressible,
302 304 allowcbor=False):
303 305 # record the url we got redirected to
304 306 redirected = False
305 307 respurl = pycompat.bytesurl(resp.geturl())
306 308 if respurl.endswith(qs):
307 309 respurl = respurl[:-len(qs)]
308 310 qsdropped = False
309 311 else:
310 312 qsdropped = True
311 313
312 314 if baseurl.rstrip('/') != respurl.rstrip('/'):
313 315 redirected = True
314 316 if not ui.quiet:
315 317 ui.warn(_('real URL is %s\n') % respurl)
316 318
317 319 try:
318 320 proto = pycompat.bytesurl(resp.getheader(r'content-type', r''))
319 321 except AttributeError:
320 322 proto = pycompat.bytesurl(resp.headers.get(r'content-type', r''))
321 323
322 324 safeurl = util.hidepassword(baseurl)
323 325 if proto.startswith('application/hg-error'):
324 326 raise error.OutOfBandError(resp.read())
325 327
326 328 # Pre 1.0 versions of Mercurial used text/plain and
327 329 # application/hg-changegroup. We don't support such old servers.
328 330 if not proto.startswith('application/mercurial-'):
329 331 ui.debug("requested URL: '%s'\n" % util.hidepassword(requrl))
330 332 msg = _("'%s' does not appear to be an hg repository:\n"
331 333 "---%%<--- (%s)\n%s\n---%%<---\n") % (
332 334 safeurl, proto or 'no content-type', resp.read(1024))
333 335
334 336 # Some servers may strip the query string from the redirect. We
335 337 # raise a special error type so callers can react to this specially.
336 338 if redirected and qsdropped:
337 339 raise RedirectedRepoError(msg, respurl)
338 340 else:
339 341 raise error.RepoError(msg)
340 342
341 343 try:
342 344 subtype = proto.split('-', 1)[1]
343 345
344 346 # Unless we end up supporting CBOR in the legacy wire protocol,
345 347 # this should ONLY be encountered for the initial capabilities
346 348 # request during handshake.
347 349 if subtype == 'cbor':
348 350 if allowcbor:
349 351 return respurl, proto, resp
350 352 else:
351 353 raise error.RepoError(_('unexpected CBOR response from '
352 354 'server'))
353 355
354 356 version_info = tuple([int(n) for n in subtype.split('.')])
355 357 except ValueError:
356 358 raise error.RepoError(_("'%s' sent a broken Content-Type "
357 359 "header (%s)") % (safeurl, proto))
358 360
359 361 # TODO consider switching to a decompression reader that uses
360 362 # generators.
361 363 if version_info == (0, 1):
362 364 if compressible:
363 365 resp = util.compengines['zlib'].decompressorreader(resp)
364 366
365 367 elif version_info == (0, 2):
366 368 # application/mercurial-0.2 always identifies the compression
367 369 # engine in the payload header.
368 370 elen = struct.unpack('B', util.readexactly(resp, 1))[0]
369 371 ename = util.readexactly(resp, elen)
370 372 engine = util.compengines.forwiretype(ename)
371 373
372 374 resp = engine.decompressorreader(resp)
373 375 else:
374 376 raise error.RepoError(_("'%s' uses newer protocol %s") %
375 377 (safeurl, subtype))
376 378
377 379 return respurl, proto, resp
378 380
379 381 class httppeer(wireprotov1peer.wirepeer):
380 382 def __init__(self, ui, path, url, opener, requestbuilder, caps):
381 383 self.ui = ui
382 384 self._path = path
383 385 self._url = url
384 386 self._caps = caps
385 387 self.limitedarguments = caps is not None and 'httppostargs' not in caps
386 388 self._urlopener = opener
387 389 self._requestbuilder = requestbuilder
388 390
389 391 def __del__(self):
390 392 for h in self._urlopener.handlers:
391 393 h.close()
392 394 getattr(h, "close_all", lambda: None)()
393 395
394 396 # Begin of ipeerconnection interface.
395 397
396 398 def url(self):
397 399 return self._path
398 400
399 401 def local(self):
400 402 return None
401 403
402 404 def peer(self):
403 405 return self
404 406
405 407 def canpush(self):
406 408 return True
407 409
408 410 def close(self):
409 411 try:
410 412 reqs, sent, recv = (self._urlopener.requestscount,
411 413 self._urlopener.sentbytescount,
412 414 self._urlopener.receivedbytescount)
413 415 except AttributeError:
414 416 return
415 417 self.ui.note(_('(sent %d HTTP requests and %d bytes; '
416 418 'received %d bytes in responses)\n') %
417 419 (reqs, sent, recv))
418 420
419 421 # End of ipeerconnection interface.
420 422
421 423 # Begin of ipeercommands interface.
422 424
423 425 def capabilities(self):
424 426 return self._caps
425 427
426 428 # End of ipeercommands interface.
427 429
428 430 def _callstream(self, cmd, _compressible=False, **args):
429 431 args = pycompat.byteskwargs(args)
430 432
431 433 req, cu, qs = makev1commandrequest(self.ui, self._requestbuilder,
432 434 self._caps, self.capable,
433 435 self._url, cmd, args)
434 436
435 437 resp = sendrequest(self.ui, self._urlopener, req)
436 438
437 439 self._url, ct, resp = parsev1commandresponse(self.ui, self._url, cu, qs,
438 440 resp, _compressible)
439 441
440 442 return resp
441 443
442 444 def _call(self, cmd, **args):
443 445 fp = self._callstream(cmd, **args)
444 446 try:
445 447 return fp.read()
446 448 finally:
447 449 # if using keepalive, allow connection to be reused
448 450 fp.close()
449 451
450 452 def _callpush(self, cmd, cg, **args):
451 453 # have to stream bundle to a temp file because we do not have
452 454 # http 1.1 chunked transfer.
453 455
454 456 types = self.capable('unbundle')
455 457 try:
456 458 types = types.split(',')
457 459 except AttributeError:
458 460 # servers older than d1b16a746db6 will send 'unbundle' as a
459 461 # boolean capability. They only support headerless/uncompressed
460 462 # bundles.
461 463 types = [""]
462 464 for x in types:
463 465 if x in bundle2.bundletypes:
464 466 type = x
465 467 break
466 468
467 469 tempname = bundle2.writebundle(self.ui, cg, None, type)
468 470 fp = httpconnection.httpsendfile(self.ui, tempname, "rb")
469 471 headers = {r'Content-Type': r'application/mercurial-0.1'}
470 472
471 473 try:
472 474 r = self._call(cmd, data=fp, headers=headers, **args)
473 475 vals = r.split('\n', 1)
474 476 if len(vals) < 2:
475 477 raise error.ResponseError(_("unexpected response:"), r)
476 478 return vals
477 479 except urlerr.httperror:
478 480 # Catch and re-raise these so we don't try and treat them
479 481 # like generic socket errors. They lack any values in
480 482 # .args on Python 3 which breaks our socket.error block.
481 483 raise
482 484 except socket.error as err:
483 485 if err.args[0] in (errno.ECONNRESET, errno.EPIPE):
484 486 raise error.Abort(_('push failed: %s') % err.args[1])
485 487 raise error.Abort(err.args[1])
486 488 finally:
487 489 fp.close()
488 490 os.unlink(tempname)
489 491
490 492 def _calltwowaystream(self, cmd, fp, **args):
491 493 fh = None
492 494 fp_ = None
493 495 filename = None
494 496 try:
495 497 # dump bundle to disk
496 498 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
497 499 fh = os.fdopen(fd, r"wb")
498 500 d = fp.read(4096)
499 501 while d:
500 502 fh.write(d)
501 503 d = fp.read(4096)
502 504 fh.close()
503 505 # start http push
504 506 fp_ = httpconnection.httpsendfile(self.ui, filename, "rb")
505 507 headers = {r'Content-Type': r'application/mercurial-0.1'}
506 508 return self._callstream(cmd, data=fp_, headers=headers, **args)
507 509 finally:
508 510 if fp_ is not None:
509 511 fp_.close()
510 512 if fh is not None:
511 513 fh.close()
512 514 os.unlink(filename)
513 515
514 516 def _callcompressable(self, cmd, **args):
515 517 return self._callstream(cmd, _compressible=True, **args)
516 518
517 519 def _abort(self, exception):
518 520 raise exception
519 521
520 522 def sendv2request(ui, opener, requestbuilder, apiurl, permission, requests,
521 523 redirect):
522 524 wireprotoframing.populatestreamencoders()
523 525
524 526 uiencoders = ui.configlist(b'experimental', b'httppeer.v2-encoder-order')
525 527
526 528 if uiencoders:
527 529 encoders = []
528 530
529 531 for encoder in uiencoders:
530 532 if encoder not in wireprotoframing.STREAM_ENCODERS:
531 533 ui.warn(_(b'wire protocol version 2 encoder referenced in '
532 534 b'config (%s) is not known; ignoring\n') % encoder)
533 535 else:
534 536 encoders.append(encoder)
535 537
536 538 else:
537 539 encoders = wireprotoframing.STREAM_ENCODERS_ORDER
538 540
539 541 reactor = wireprotoframing.clientreactor(ui,
540 542 hasmultiplesend=False,
541 543 buffersends=True,
542 544 clientcontentencoders=encoders)
543 545
544 546 handler = wireprotov2peer.clienthandler(ui, reactor,
545 547 opener=opener,
546 548 requestbuilder=requestbuilder)
547 549
548 550 url = '%s/%s' % (apiurl, permission)
549 551
550 552 if len(requests) > 1:
551 553 url += '/multirequest'
552 554 else:
553 555 url += '/%s' % requests[0][0]
554 556
555 557 ui.debug('sending %d commands\n' % len(requests))
556 558 for command, args, f in requests:
557 559 ui.debug('sending command %s: %s\n' % (
558 560 command, stringutil.pprint(args, indent=2)))
559 561 assert not list(handler.callcommand(command, args, f,
560 562 redirect=redirect))
561 563
562 564 # TODO stream this.
563 565 body = b''.join(map(bytes, handler.flushcommands()))
564 566
565 567 # TODO modify user-agent to reflect v2
566 568 headers = {
567 569 r'Accept': wireprotov2server.FRAMINGTYPE,
568 570 r'Content-Type': wireprotov2server.FRAMINGTYPE,
569 571 }
570 572
571 573 req = requestbuilder(pycompat.strurl(url), body, headers)
572 574 req.add_unredirected_header(r'Content-Length', r'%d' % len(body))
573 575
574 576 try:
575 577 res = opener.open(req)
576 578 except urlerr.httperror as e:
577 579 if e.code == 401:
578 580 raise error.Abort(_('authorization failed'))
579 581
580 582 raise
581 583 except httplib.HTTPException as e:
582 584 ui.traceback()
583 585 raise IOError(None, e)
584 586
585 587 return handler, res
586 588
587 589 class queuedcommandfuture(pycompat.futures.Future):
588 590 """Wraps result() on command futures to trigger submission on call."""
589 591
590 592 def result(self, timeout=None):
591 593 if self.done():
592 594 return pycompat.futures.Future.result(self, timeout)
593 595
594 596 self._peerexecutor.sendcommands()
595 597
596 598 # sendcommands() will restore the original __class__ and self.result
597 599 # will resolve to Future.result.
598 600 return self.result(timeout)
599 601
600 602 @interfaceutil.implementer(repository.ipeercommandexecutor)
601 603 class httpv2executor(object):
602 604 def __init__(self, ui, opener, requestbuilder, apiurl, descriptor,
603 605 redirect):
604 606 self._ui = ui
605 607 self._opener = opener
606 608 self._requestbuilder = requestbuilder
607 609 self._apiurl = apiurl
608 610 self._descriptor = descriptor
609 611 self._redirect = redirect
610 612 self._sent = False
611 613 self._closed = False
612 614 self._neededpermissions = set()
613 615 self._calls = []
614 616 self._futures = weakref.WeakSet()
615 617 self._responseexecutor = None
616 618 self._responsef = None
617 619
618 620 def __enter__(self):
619 621 return self
620 622
621 623 def __exit__(self, exctype, excvalue, exctb):
622 624 self.close()
623 625
624 626 def callcommand(self, command, args):
625 627 if self._sent:
626 628 raise error.ProgrammingError('callcommand() cannot be used after '
627 629 'commands are sent')
628 630
629 631 if self._closed:
630 632 raise error.ProgrammingError('callcommand() cannot be used after '
631 633 'close()')
632 634
633 635 # The service advertises which commands are available. So if we attempt
634 636 # to call an unknown command or pass an unknown argument, we can screen
635 637 # for this.
636 638 if command not in self._descriptor['commands']:
637 639 raise error.ProgrammingError(
638 640 'wire protocol command %s is not available' % command)
639 641
640 642 cmdinfo = self._descriptor['commands'][command]
641 643 unknownargs = set(args.keys()) - set(cmdinfo.get('args', {}))
642 644
643 645 if unknownargs:
644 646 raise error.ProgrammingError(
645 647 'wire protocol command %s does not accept argument: %s' % (
646 648 command, ', '.join(sorted(unknownargs))))
647 649
648 650 self._neededpermissions |= set(cmdinfo['permissions'])
649 651
650 652 # TODO we /could/ also validate types here, since the API descriptor
651 653 # includes types...
652 654
653 655 f = pycompat.futures.Future()
654 656
655 657 # Monkeypatch it so result() triggers sendcommands(), otherwise result()
656 658 # could deadlock.
657 659 f.__class__ = queuedcommandfuture
658 660 f._peerexecutor = self
659 661
660 662 self._futures.add(f)
661 663 self._calls.append((command, args, f))
662 664
663 665 return f
664 666
665 667 def sendcommands(self):
666 668 if self._sent:
667 669 return
668 670
669 671 if not self._calls:
670 672 return
671 673
672 674 self._sent = True
673 675
674 676 # Unhack any future types so caller sees a clean type and so we
675 677 # break reference cycle.
676 678 for f in self._futures:
677 679 if isinstance(f, queuedcommandfuture):
678 680 f.__class__ = pycompat.futures.Future
679 681 f._peerexecutor = None
680 682
681 683 # Mark the future as running and filter out cancelled futures.
682 684 calls = [(command, args, f)
683 685 for command, args, f in self._calls
684 686 if f.set_running_or_notify_cancel()]
685 687
686 688 # Clear out references, prevent improper object usage.
687 689 self._calls = None
688 690
689 691 if not calls:
690 692 return
691 693
692 694 permissions = set(self._neededpermissions)
693 695
694 696 if 'push' in permissions and 'pull' in permissions:
695 697 permissions.remove('pull')
696 698
697 699 if len(permissions) > 1:
698 700 raise error.RepoError(_('cannot make request requiring multiple '
699 701 'permissions: %s') %
700 702 _(', ').join(sorted(permissions)))
701 703
702 704 permission = {
703 705 'push': 'rw',
704 706 'pull': 'ro',
705 707 }[permissions.pop()]
706 708
707 709 handler, resp = sendv2request(
708 710 self._ui, self._opener, self._requestbuilder, self._apiurl,
709 711 permission, calls, self._redirect)
710 712
711 713 # TODO we probably want to validate the HTTP code, media type, etc.
712 714
713 715 self._responseexecutor = pycompat.futures.ThreadPoolExecutor(1)
714 716 self._responsef = self._responseexecutor.submit(self._handleresponse,
715 717 handler, resp)
716 718
717 719 def close(self):
718 720 if self._closed:
719 721 return
720 722
721 723 self.sendcommands()
722 724
723 725 self._closed = True
724 726
725 727 if not self._responsef:
726 728 return
727 729
728 730 # TODO ^C here may not result in immediate program termination.
729 731
730 732 try:
731 733 self._responsef.result()
732 734 finally:
733 735 self._responseexecutor.shutdown(wait=True)
734 736 self._responsef = None
735 737 self._responseexecutor = None
736 738
737 739 # If any of our futures are still in progress, mark them as
738 740 # errored, otherwise a result() could wait indefinitely.
739 741 for f in self._futures:
740 742 if not f.done():
741 743 f.set_exception(error.ResponseError(
742 744 _('unfulfilled command response')))
743 745
744 746 self._futures = None
745 747
746 748 def _handleresponse(self, handler, resp):
747 749 # Called in a thread to read the response.
748 750
749 751 while handler.readdata(resp):
750 752 pass
751 753
752 754 @interfaceutil.implementer(repository.ipeerv2)
753 755 class httpv2peer(object):
754 756
755 757 limitedarguments = False
756 758
757 759 def __init__(self, ui, repourl, apipath, opener, requestbuilder,
758 760 apidescriptor):
759 761 self.ui = ui
760 762 self.apidescriptor = apidescriptor
761 763
762 764 if repourl.endswith('/'):
763 765 repourl = repourl[:-1]
764 766
765 767 self._url = repourl
766 768 self._apipath = apipath
767 769 self._apiurl = '%s/%s' % (repourl, apipath)
768 770 self._opener = opener
769 771 self._requestbuilder = requestbuilder
770 772
771 773 self._redirect = wireprotov2peer.supportedredirects(ui, apidescriptor)
772 774
773 775 # Start of ipeerconnection.
774 776
775 777 def url(self):
776 778 return self._url
777 779
778 780 def local(self):
779 781 return None
780 782
781 783 def peer(self):
782 784 return self
783 785
784 786 def canpush(self):
785 787 # TODO change once implemented.
786 788 return False
787 789
788 790 def close(self):
789 791 self.ui.note(_('(sent %d HTTP requests and %d bytes; '
790 792 'received %d bytes in responses)\n') %
791 793 (self._opener.requestscount,
792 794 self._opener.sentbytescount,
793 795 self._opener.receivedbytescount))
794 796
795 797 # End of ipeerconnection.
796 798
797 799 # Start of ipeercapabilities.
798 800
799 801 def capable(self, name):
800 802 # The capabilities used internally historically map to capabilities
801 803 # advertised from the "capabilities" wire protocol command. However,
802 804 # version 2 of that command works differently.
803 805
804 806 # Maps to commands that are available.
805 807 if name in ('branchmap', 'getbundle', 'known', 'lookup', 'pushkey'):
806 808 return True
807 809
808 810 # Other concepts.
809 811 if name in ('bundle2'):
810 812 return True
811 813
812 814 # Alias command-* to presence of command of that name.
813 815 if name.startswith('command-'):
814 816 return name[len('command-'):] in self.apidescriptor['commands']
815 817
816 818 return False
817 819
818 820 def requirecap(self, name, purpose):
819 821 if self.capable(name):
820 822 return
821 823
822 824 raise error.CapabilityError(
823 825 _('cannot %s; client or remote repository does not support the '
824 826 '\'%s\' capability') % (purpose, name))
825 827
826 828 # End of ipeercapabilities.
827 829
828 830 def _call(self, name, **args):
829 831 with self.commandexecutor() as e:
830 832 return e.callcommand(name, args).result()
831 833
832 834 def commandexecutor(self):
833 835 return httpv2executor(self.ui, self._opener, self._requestbuilder,
834 836 self._apiurl, self.apidescriptor, self._redirect)
835 837
836 838 # Registry of API service names to metadata about peers that handle it.
837 839 #
838 840 # The following keys are meaningful:
839 841 #
840 842 # init
841 843 # Callable receiving (ui, repourl, servicepath, opener, requestbuilder,
842 844 # apidescriptor) to create a peer.
843 845 #
844 846 # priority
845 847 # Integer priority for the service. If we could choose from multiple
846 848 # services, we choose the one with the highest priority.
847 849 API_PEERS = {
848 850 wireprototypes.HTTP_WIREPROTO_V2: {
849 851 'init': httpv2peer,
850 852 'priority': 50,
851 853 },
852 854 }
853 855
854 856 def performhandshake(ui, url, opener, requestbuilder):
855 857 # The handshake is a request to the capabilities command.
856 858
857 859 caps = None
858 860 def capable(x):
859 861 raise error.ProgrammingError('should not be called')
860 862
861 863 args = {}
862 864
863 865 # The client advertises support for newer protocols by adding an
864 866 # X-HgUpgrade-* header with a list of supported APIs and an
865 867 # X-HgProto-* header advertising which serializing formats it supports.
866 868 # We only support the HTTP version 2 transport and CBOR responses for
867 869 # now.
868 870 advertisev2 = ui.configbool('experimental', 'httppeer.advertise-v2')
869 871
870 872 if advertisev2:
871 873 args['headers'] = {
872 874 r'X-HgProto-1': r'cbor',
873 875 }
874 876
875 877 args['headers'].update(
876 878 encodevalueinheaders(' '.join(sorted(API_PEERS)),
877 879 'X-HgUpgrade',
878 880 # We don't know the header limit this early.
879 881 # So make it small.
880 882 1024))
881 883
882 884 req, requrl, qs = makev1commandrequest(ui, requestbuilder, caps,
883 885 capable, url, 'capabilities',
884 886 args)
885 887 resp = sendrequest(ui, opener, req)
886 888
887 889 # The server may redirect us to the repo root, stripping the
888 890 # ?cmd=capabilities query string from the URL. The server would likely
889 891 # return HTML in this case and ``parsev1commandresponse()`` would raise.
890 892 # We catch this special case and re-issue the capabilities request against
891 893 # the new URL.
892 894 #
893 895 # We should ideally not do this, as a redirect that drops the query
894 896 # string from the URL is arguably a server bug. (Garbage in, garbage out).
895 897 # However, Mercurial clients for several years appeared to handle this
896 898 # issue without behavior degradation. And according to issue 5860, it may
897 899 # be a longstanding bug in some server implementations. So we allow a
898 900 # redirect that drops the query string to "just work."
899 901 try:
900 902 respurl, ct, resp = parsev1commandresponse(ui, url, requrl, qs, resp,
901 903 compressible=False,
902 904 allowcbor=advertisev2)
903 905 except RedirectedRepoError as e:
904 906 req, requrl, qs = makev1commandrequest(ui, requestbuilder, caps,
905 907 capable, e.respurl,
906 908 'capabilities', args)
907 909 resp = sendrequest(ui, opener, req)
908 910 respurl, ct, resp = parsev1commandresponse(ui, url, requrl, qs, resp,
909 911 compressible=False,
910 912 allowcbor=advertisev2)
911 913
912 914 try:
913 915 rawdata = resp.read()
914 916 finally:
915 917 resp.close()
916 918
917 919 if not ct.startswith('application/mercurial-'):
918 920 raise error.ProgrammingError('unexpected content-type: %s' % ct)
919 921
920 922 if advertisev2:
921 923 if ct == 'application/mercurial-cbor':
922 924 try:
923 925 info = cborutil.decodeall(rawdata)[0]
924 926 except cborutil.CBORDecodeError:
925 927 raise error.Abort(_('error decoding CBOR from remote server'),
926 928 hint=_('try again and consider contacting '
927 929 'the server operator'))
928 930
929 931 # We got a legacy response. That's fine.
930 932 elif ct in ('application/mercurial-0.1', 'application/mercurial-0.2'):
931 933 info = {
932 934 'v1capabilities': set(rawdata.split())
933 935 }
934 936
935 937 else:
936 938 raise error.RepoError(
937 939 _('unexpected response type from server: %s') % ct)
938 940 else:
939 941 info = {
940 942 'v1capabilities': set(rawdata.split())
941 943 }
942 944
943 945 return respurl, info
944 946
945 947 def makepeer(ui, path, opener=None, requestbuilder=urlreq.request):
946 948 """Construct an appropriate HTTP peer instance.
947 949
948 950 ``opener`` is an ``url.opener`` that should be used to establish
949 951 connections, perform HTTP requests.
950 952
951 953 ``requestbuilder`` is the type used for constructing HTTP requests.
952 954 It exists as an argument so extensions can override the default.
953 955 """
954 956 u = util.url(path)
955 957 if u.query or u.fragment:
956 958 raise error.Abort(_('unsupported URL component: "%s"') %
957 959 (u.query or u.fragment))
958 960
959 961 # urllib cannot handle URLs with embedded user or passwd.
960 962 url, authinfo = u.authinfo()
961 963 ui.debug('using %s\n' % url)
962 964
963 965 opener = opener or urlmod.opener(ui, authinfo)
964 966
965 967 respurl, info = performhandshake(ui, url, opener, requestbuilder)
966 968
967 969 # Given the intersection of APIs that both we and the server support,
968 970 # sort by their advertised priority and pick the first one.
969 971 #
970 972 # TODO consider making this request-based and interface driven. For
971 973 # example, the caller could say "I want a peer that does X." It's quite
972 974 # possible that not all peers would do that. Since we know the service
973 975 # capabilities, we could filter out services not meeting the
974 976 # requirements. Possibly by consulting the interfaces defined by the
975 977 # peer type.
976 978 apipeerchoices = set(info.get('apis', {}).keys()) & set(API_PEERS.keys())
977 979
978 980 preferredchoices = sorted(apipeerchoices,
979 981 key=lambda x: API_PEERS[x]['priority'],
980 982 reverse=True)
981 983
982 984 for service in preferredchoices:
983 985 apipath = '%s/%s' % (info['apibase'].rstrip('/'), service)
984 986
985 987 return API_PEERS[service]['init'](ui, respurl, apipath, opener,
986 988 requestbuilder,
987 989 info['apis'][service])
988 990
989 991 # Failed to construct an API peer. Fall back to legacy.
990 992 return httppeer(ui, path, respurl, opener, requestbuilder,
991 993 info['v1capabilities'])
992 994
993 995 def instance(ui, path, create, intents=None, createopts=None):
994 996 if create:
995 997 raise error.Abort(_('cannot create new http repository'))
996 998 try:
997 999 if path.startswith('https:') and not urlmod.has_https:
998 1000 raise error.Abort(_('Python support for SSL and HTTPS '
999 1001 'is not installed'))
1000 1002
1001 1003 inst = makepeer(ui, path)
1002 1004
1003 1005 return inst
1004 1006 except error.RepoError as httpexception:
1005 1007 try:
1006 1008 r = statichttprepo.instance(ui, "static-" + path, create)
1007 1009 ui.note(_('(falling back to static-http)\n'))
1008 1010 return r
1009 1011 except error.RepoError:
1010 1012 raise httpexception # use the original http RepoError instead
@@ -1,1877 +1,1877
1 1 # repository.py - Interfaces and base classes for repositories and peers.
2 2 #
3 3 # Copyright 2017 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 from .i18n import _
11 from . import (
10 from ..i18n import _
11 from .. import (
12 12 error,
13 13 )
14 from .utils import (
14 from ..utils import (
15 15 interfaceutil,
16 16 )
17 17
18 18 # When narrowing is finalized and no longer subject to format changes,
19 19 # we should move this to just "narrow" or similar.
20 20 NARROW_REQUIREMENT = 'narrowhg-experimental'
21 21
22 22 # Local repository feature string.
23 23
24 24 # Revlogs are being used for file storage.
25 25 REPO_FEATURE_REVLOG_FILE_STORAGE = b'revlogfilestorage'
26 26 # The storage part of the repository is shared from an external source.
27 27 REPO_FEATURE_SHARED_STORAGE = b'sharedstore'
28 28 # LFS supported for backing file storage.
29 29 REPO_FEATURE_LFS = b'lfs'
30 30 # Repository supports being stream cloned.
31 31 REPO_FEATURE_STREAM_CLONE = b'streamclone'
32 32 # Files storage may lack data for all ancestors.
33 33 REPO_FEATURE_SHALLOW_FILE_STORAGE = b'shallowfilestorage'
34 34
35 35 REVISION_FLAG_CENSORED = 1 << 15
36 36 REVISION_FLAG_ELLIPSIS = 1 << 14
37 37 REVISION_FLAG_EXTSTORED = 1 << 13
38 38
39 39 REVISION_FLAGS_KNOWN = (
40 40 REVISION_FLAG_CENSORED | REVISION_FLAG_ELLIPSIS | REVISION_FLAG_EXTSTORED)
41 41
42 42 CG_DELTAMODE_STD = b'default'
43 43 CG_DELTAMODE_PREV = b'previous'
44 44 CG_DELTAMODE_FULL = b'fulltext'
45 45 CG_DELTAMODE_P1 = b'p1'
46 46
47 47 class ipeerconnection(interfaceutil.Interface):
48 48 """Represents a "connection" to a repository.
49 49
50 50 This is the base interface for representing a connection to a repository.
51 51 It holds basic properties and methods applicable to all peer types.
52 52
53 53 This is not a complete interface definition and should not be used
54 54 outside of this module.
55 55 """
56 56 ui = interfaceutil.Attribute("""ui.ui instance""")
57 57
58 58 def url():
59 59 """Returns a URL string representing this peer.
60 60
61 61 Currently, implementations expose the raw URL used to construct the
62 62 instance. It may contain credentials as part of the URL. The
63 63 expectations of the value aren't well-defined and this could lead to
64 64 data leakage.
65 65
66 66 TODO audit/clean consumers and more clearly define the contents of this
67 67 value.
68 68 """
69 69
70 70 def local():
71 71 """Returns a local repository instance.
72 72
73 73 If the peer represents a local repository, returns an object that
74 74 can be used to interface with it. Otherwise returns ``None``.
75 75 """
76 76
77 77 def peer():
78 78 """Returns an object conforming to this interface.
79 79
80 80 Most implementations will ``return self``.
81 81 """
82 82
83 83 def canpush():
84 84 """Returns a boolean indicating if this peer can be pushed to."""
85 85
86 86 def close():
87 87 """Close the connection to this peer.
88 88
89 89 This is called when the peer will no longer be used. Resources
90 90 associated with the peer should be cleaned up.
91 91 """
92 92
93 93 class ipeercapabilities(interfaceutil.Interface):
94 94 """Peer sub-interface related to capabilities."""
95 95
96 96 def capable(name):
97 97 """Determine support for a named capability.
98 98
99 99 Returns ``False`` if capability not supported.
100 100
101 101 Returns ``True`` if boolean capability is supported. Returns a string
102 102 if capability support is non-boolean.
103 103
104 104 Capability strings may or may not map to wire protocol capabilities.
105 105 """
106 106
107 107 def requirecap(name, purpose):
108 108 """Require a capability to be present.
109 109
110 110 Raises a ``CapabilityError`` if the capability isn't present.
111 111 """
112 112
113 113 class ipeercommands(interfaceutil.Interface):
114 114 """Client-side interface for communicating over the wire protocol.
115 115
116 116 This interface is used as a gateway to the Mercurial wire protocol.
117 117 methods commonly call wire protocol commands of the same name.
118 118 """
119 119
120 120 def branchmap():
121 121 """Obtain heads in named branches.
122 122
123 123 Returns a dict mapping branch name to an iterable of nodes that are
124 124 heads on that branch.
125 125 """
126 126
127 127 def capabilities():
128 128 """Obtain capabilities of the peer.
129 129
130 130 Returns a set of string capabilities.
131 131 """
132 132
133 133 def clonebundles():
134 134 """Obtains the clone bundles manifest for the repo.
135 135
136 136 Returns the manifest as unparsed bytes.
137 137 """
138 138
139 139 def debugwireargs(one, two, three=None, four=None, five=None):
140 140 """Used to facilitate debugging of arguments passed over the wire."""
141 141
142 142 def getbundle(source, **kwargs):
143 143 """Obtain remote repository data as a bundle.
144 144
145 145 This command is how the bulk of repository data is transferred from
146 146 the peer to the local repository
147 147
148 148 Returns a generator of bundle data.
149 149 """
150 150
151 151 def heads():
152 152 """Determine all known head revisions in the peer.
153 153
154 154 Returns an iterable of binary nodes.
155 155 """
156 156
157 157 def known(nodes):
158 158 """Determine whether multiple nodes are known.
159 159
160 160 Accepts an iterable of nodes whose presence to check for.
161 161
162 162 Returns an iterable of booleans indicating of the corresponding node
163 163 at that index is known to the peer.
164 164 """
165 165
166 166 def listkeys(namespace):
167 167 """Obtain all keys in a pushkey namespace.
168 168
169 169 Returns an iterable of key names.
170 170 """
171 171
172 172 def lookup(key):
173 173 """Resolve a value to a known revision.
174 174
175 175 Returns a binary node of the resolved revision on success.
176 176 """
177 177
178 178 def pushkey(namespace, key, old, new):
179 179 """Set a value using the ``pushkey`` protocol.
180 180
181 181 Arguments correspond to the pushkey namespace and key to operate on and
182 182 the old and new values for that key.
183 183
184 184 Returns a string with the peer result. The value inside varies by the
185 185 namespace.
186 186 """
187 187
188 188 def stream_out():
189 189 """Obtain streaming clone data.
190 190
191 191 Successful result should be a generator of data chunks.
192 192 """
193 193
194 194 def unbundle(bundle, heads, url):
195 195 """Transfer repository data to the peer.
196 196
197 197 This is how the bulk of data during a push is transferred.
198 198
199 199 Returns the integer number of heads added to the peer.
200 200 """
201 201
202 202 class ipeerlegacycommands(interfaceutil.Interface):
203 203 """Interface for implementing support for legacy wire protocol commands.
204 204
205 205 Wire protocol commands transition to legacy status when they are no longer
206 206 used by modern clients. To facilitate identifying which commands are
207 207 legacy, the interfaces are split.
208 208 """
209 209
210 210 def between(pairs):
211 211 """Obtain nodes between pairs of nodes.
212 212
213 213 ``pairs`` is an iterable of node pairs.
214 214
215 215 Returns an iterable of iterables of nodes corresponding to each
216 216 requested pair.
217 217 """
218 218
219 219 def branches(nodes):
220 220 """Obtain ancestor changesets of specific nodes back to a branch point.
221 221
222 222 For each requested node, the peer finds the first ancestor node that is
223 223 a DAG root or is a merge.
224 224
225 225 Returns an iterable of iterables with the resolved values for each node.
226 226 """
227 227
228 228 def changegroup(nodes, source):
229 229 """Obtain a changegroup with data for descendants of specified nodes."""
230 230
231 231 def changegroupsubset(bases, heads, source):
232 232 pass
233 233
234 234 class ipeercommandexecutor(interfaceutil.Interface):
235 235 """Represents a mechanism to execute remote commands.
236 236
237 237 This is the primary interface for requesting that wire protocol commands
238 238 be executed. Instances of this interface are active in a context manager
239 239 and have a well-defined lifetime. When the context manager exits, all
240 240 outstanding requests are waited on.
241 241 """
242 242
243 243 def callcommand(name, args):
244 244 """Request that a named command be executed.
245 245
246 246 Receives the command name and a dictionary of command arguments.
247 247
248 248 Returns a ``concurrent.futures.Future`` that will resolve to the
249 249 result of that command request. That exact value is left up to
250 250 the implementation and possibly varies by command.
251 251
252 252 Not all commands can coexist with other commands in an executor
253 253 instance: it depends on the underlying wire protocol transport being
254 254 used and the command itself.
255 255
256 256 Implementations MAY call ``sendcommands()`` automatically if the
257 257 requested command can not coexist with other commands in this executor.
258 258
259 259 Implementations MAY call ``sendcommands()`` automatically when the
260 260 future's ``result()`` is called. So, consumers using multiple
261 261 commands with an executor MUST ensure that ``result()`` is not called
262 262 until all command requests have been issued.
263 263 """
264 264
265 265 def sendcommands():
266 266 """Trigger submission of queued command requests.
267 267
268 268 Not all transports submit commands as soon as they are requested to
269 269 run. When called, this method forces queued command requests to be
270 270 issued. It will no-op if all commands have already been sent.
271 271
272 272 When called, no more new commands may be issued with this executor.
273 273 """
274 274
275 275 def close():
276 276 """Signal that this command request is finished.
277 277
278 278 When called, no more new commands may be issued. All outstanding
279 279 commands that have previously been issued are waited on before
280 280 returning. This not only includes waiting for the futures to resolve,
281 281 but also waiting for all response data to arrive. In other words,
282 282 calling this waits for all on-wire state for issued command requests
283 283 to finish.
284 284
285 285 When used as a context manager, this method is called when exiting the
286 286 context manager.
287 287
288 288 This method may call ``sendcommands()`` if there are buffered commands.
289 289 """
290 290
291 291 class ipeerrequests(interfaceutil.Interface):
292 292 """Interface for executing commands on a peer."""
293 293
294 294 limitedarguments = interfaceutil.Attribute(
295 295 """True if the peer cannot receive large argument value for commands."""
296 296 )
297 297
298 298 def commandexecutor():
299 299 """A context manager that resolves to an ipeercommandexecutor.
300 300
301 301 The object this resolves to can be used to issue command requests
302 302 to the peer.
303 303
304 304 Callers should call its ``callcommand`` method to issue command
305 305 requests.
306 306
307 307 A new executor should be obtained for each distinct set of commands
308 308 (possibly just a single command) that the consumer wants to execute
309 309 as part of a single operation or round trip. This is because some
310 310 peers are half-duplex and/or don't support persistent connections.
311 311 e.g. in the case of HTTP peers, commands sent to an executor represent
312 312 a single HTTP request. While some peers may support multiple command
313 313 sends over the wire per executor, consumers need to code to the least
314 314 capable peer. So it should be assumed that command executors buffer
315 315 called commands until they are told to send them and that each
316 316 command executor could result in a new connection or wire-level request
317 317 being issued.
318 318 """
319 319
320 320 class ipeerbase(ipeerconnection, ipeercapabilities, ipeerrequests):
321 321 """Unified interface for peer repositories.
322 322
323 323 All peer instances must conform to this interface.
324 324 """
325 325
326 326 class ipeerv2(ipeerconnection, ipeercapabilities, ipeerrequests):
327 327 """Unified peer interface for wire protocol version 2 peers."""
328 328
329 329 apidescriptor = interfaceutil.Attribute(
330 330 """Data structure holding description of server API.""")
331 331
332 332 @interfaceutil.implementer(ipeerbase)
333 333 class peer(object):
334 334 """Base class for peer repositories."""
335 335
336 336 limitedarguments = False
337 337
338 338 def capable(self, name):
339 339 caps = self.capabilities()
340 340 if name in caps:
341 341 return True
342 342
343 343 name = '%s=' % name
344 344 for cap in caps:
345 345 if cap.startswith(name):
346 346 return cap[len(name):]
347 347
348 348 return False
349 349
350 350 def requirecap(self, name, purpose):
351 351 if self.capable(name):
352 352 return
353 353
354 354 raise error.CapabilityError(
355 355 _('cannot %s; remote repository does not support the '
356 356 '\'%s\' capability') % (purpose, name))
357 357
358 358 class iverifyproblem(interfaceutil.Interface):
359 359 """Represents a problem with the integrity of the repository.
360 360
361 361 Instances of this interface are emitted to describe an integrity issue
362 362 with a repository (e.g. corrupt storage, missing data, etc).
363 363
364 364 Instances are essentially messages associated with severity.
365 365 """
366 366 warning = interfaceutil.Attribute(
367 367 """Message indicating a non-fatal problem.""")
368 368
369 369 error = interfaceutil.Attribute(
370 370 """Message indicating a fatal problem.""")
371 371
372 372 node = interfaceutil.Attribute(
373 373 """Revision encountering the problem.
374 374
375 375 ``None`` means the problem doesn't apply to a single revision.
376 376 """)
377 377
378 378 class irevisiondelta(interfaceutil.Interface):
379 379 """Represents a delta between one revision and another.
380 380
381 381 Instances convey enough information to allow a revision to be exchanged
382 382 with another repository.
383 383
384 384 Instances represent the fulltext revision data or a delta against
385 385 another revision. Therefore the ``revision`` and ``delta`` attributes
386 386 are mutually exclusive.
387 387
388 388 Typically used for changegroup generation.
389 389 """
390 390
391 391 node = interfaceutil.Attribute(
392 392 """20 byte node of this revision.""")
393 393
394 394 p1node = interfaceutil.Attribute(
395 395 """20 byte node of 1st parent of this revision.""")
396 396
397 397 p2node = interfaceutil.Attribute(
398 398 """20 byte node of 2nd parent of this revision.""")
399 399
400 400 linknode = interfaceutil.Attribute(
401 401 """20 byte node of the changelog revision this node is linked to.""")
402 402
403 403 flags = interfaceutil.Attribute(
404 404 """2 bytes of integer flags that apply to this revision.
405 405
406 406 This is a bitwise composition of the ``REVISION_FLAG_*`` constants.
407 407 """)
408 408
409 409 basenode = interfaceutil.Attribute(
410 410 """20 byte node of the revision this data is a delta against.
411 411
412 412 ``nullid`` indicates that the revision is a full revision and not
413 413 a delta.
414 414 """)
415 415
416 416 baserevisionsize = interfaceutil.Attribute(
417 417 """Size of base revision this delta is against.
418 418
419 419 May be ``None`` if ``basenode`` is ``nullid``.
420 420 """)
421 421
422 422 revision = interfaceutil.Attribute(
423 423 """Raw fulltext of revision data for this node.""")
424 424
425 425 delta = interfaceutil.Attribute(
426 426 """Delta between ``basenode`` and ``node``.
427 427
428 428 Stored in the bdiff delta format.
429 429 """)
430 430
431 431 class ifilerevisionssequence(interfaceutil.Interface):
432 432 """Contains index data for all revisions of a file.
433 433
434 434 Types implementing this behave like lists of tuples. The index
435 435 in the list corresponds to the revision number. The values contain
436 436 index metadata.
437 437
438 438 The *null* revision (revision number -1) is always the last item
439 439 in the index.
440 440 """
441 441
442 442 def __len__():
443 443 """The total number of revisions."""
444 444
445 445 def __getitem__(rev):
446 446 """Returns the object having a specific revision number.
447 447
448 448 Returns an 8-tuple with the following fields:
449 449
450 450 offset+flags
451 451 Contains the offset and flags for the revision. 64-bit unsigned
452 452 integer where first 6 bytes are the offset and the next 2 bytes
453 453 are flags. The offset can be 0 if it is not used by the store.
454 454 compressed size
455 455 Size of the revision data in the store. It can be 0 if it isn't
456 456 needed by the store.
457 457 uncompressed size
458 458 Fulltext size. It can be 0 if it isn't needed by the store.
459 459 base revision
460 460 Revision number of revision the delta for storage is encoded
461 461 against. -1 indicates not encoded against a base revision.
462 462 link revision
463 463 Revision number of changelog revision this entry is related to.
464 464 p1 revision
465 465 Revision number of 1st parent. -1 if no 1st parent.
466 466 p2 revision
467 467 Revision number of 2nd parent. -1 if no 1st parent.
468 468 node
469 469 Binary node value for this revision number.
470 470
471 471 Negative values should index off the end of the sequence. ``-1``
472 472 should return the null revision. ``-2`` should return the most
473 473 recent revision.
474 474 """
475 475
476 476 def __contains__(rev):
477 477 """Whether a revision number exists."""
478 478
479 479 def insert(self, i, entry):
480 480 """Add an item to the index at specific revision."""
481 481
482 482 class ifileindex(interfaceutil.Interface):
483 483 """Storage interface for index data of a single file.
484 484
485 485 File storage data is divided into index metadata and data storage.
486 486 This interface defines the index portion of the interface.
487 487
488 488 The index logically consists of:
489 489
490 490 * A mapping between revision numbers and nodes.
491 491 * DAG data (storing and querying the relationship between nodes).
492 492 * Metadata to facilitate storage.
493 493 """
494 494 def __len__():
495 495 """Obtain the number of revisions stored for this file."""
496 496
497 497 def __iter__():
498 498 """Iterate over revision numbers for this file."""
499 499
500 500 def hasnode(node):
501 501 """Returns a bool indicating if a node is known to this store.
502 502
503 503 Implementations must only return True for full, binary node values:
504 504 hex nodes, revision numbers, and partial node matches must be
505 505 rejected.
506 506
507 507 The null node is never present.
508 508 """
509 509
510 510 def revs(start=0, stop=None):
511 511 """Iterate over revision numbers for this file, with control."""
512 512
513 513 def parents(node):
514 514 """Returns a 2-tuple of parent nodes for a revision.
515 515
516 516 Values will be ``nullid`` if the parent is empty.
517 517 """
518 518
519 519 def parentrevs(rev):
520 520 """Like parents() but operates on revision numbers."""
521 521
522 522 def rev(node):
523 523 """Obtain the revision number given a node.
524 524
525 525 Raises ``error.LookupError`` if the node is not known.
526 526 """
527 527
528 528 def node(rev):
529 529 """Obtain the node value given a revision number.
530 530
531 531 Raises ``IndexError`` if the node is not known.
532 532 """
533 533
534 534 def lookup(node):
535 535 """Attempt to resolve a value to a node.
536 536
537 537 Value can be a binary node, hex node, revision number, or a string
538 538 that can be converted to an integer.
539 539
540 540 Raises ``error.LookupError`` if a node could not be resolved.
541 541 """
542 542
543 543 def linkrev(rev):
544 544 """Obtain the changeset revision number a revision is linked to."""
545 545
546 546 def iscensored(rev):
547 547 """Return whether a revision's content has been censored."""
548 548
549 549 def commonancestorsheads(node1, node2):
550 550 """Obtain an iterable of nodes containing heads of common ancestors.
551 551
552 552 See ``ancestor.commonancestorsheads()``.
553 553 """
554 554
555 555 def descendants(revs):
556 556 """Obtain descendant revision numbers for a set of revision numbers.
557 557
558 558 If ``nullrev`` is in the set, this is equivalent to ``revs()``.
559 559 """
560 560
561 561 def heads(start=None, stop=None):
562 562 """Obtain a list of nodes that are DAG heads, with control.
563 563
564 564 The set of revisions examined can be limited by specifying
565 565 ``start`` and ``stop``. ``start`` is a node. ``stop`` is an
566 566 iterable of nodes. DAG traversal starts at earlier revision
567 567 ``start`` and iterates forward until any node in ``stop`` is
568 568 encountered.
569 569 """
570 570
571 571 def children(node):
572 572 """Obtain nodes that are children of a node.
573 573
574 574 Returns a list of nodes.
575 575 """
576 576
577 577 class ifiledata(interfaceutil.Interface):
578 578 """Storage interface for data storage of a specific file.
579 579
580 580 This complements ``ifileindex`` and provides an interface for accessing
581 581 data for a tracked file.
582 582 """
583 583 def size(rev):
584 584 """Obtain the fulltext size of file data.
585 585
586 586 Any metadata is excluded from size measurements.
587 587 """
588 588
589 589 def revision(node, raw=False):
590 590 """"Obtain fulltext data for a node.
591 591
592 592 By default, any storage transformations are applied before the data
593 593 is returned. If ``raw`` is True, non-raw storage transformations
594 594 are not applied.
595 595
596 596 The fulltext data may contain a header containing metadata. Most
597 597 consumers should use ``read()`` to obtain the actual file data.
598 598 """
599 599
600 600 def rawdata(node):
601 601 """Obtain raw data for a node.
602 602 """
603 603
604 604 def read(node):
605 605 """Resolve file fulltext data.
606 606
607 607 This is similar to ``revision()`` except any metadata in the data
608 608 headers is stripped.
609 609 """
610 610
611 611 def renamed(node):
612 612 """Obtain copy metadata for a node.
613 613
614 614 Returns ``False`` if no copy metadata is stored or a 2-tuple of
615 615 (path, node) from which this revision was copied.
616 616 """
617 617
618 618 def cmp(node, fulltext):
619 619 """Compare fulltext to another revision.
620 620
621 621 Returns True if the fulltext is different from what is stored.
622 622
623 623 This takes copy metadata into account.
624 624
625 625 TODO better document the copy metadata and censoring logic.
626 626 """
627 627
628 628 def emitrevisions(nodes,
629 629 nodesorder=None,
630 630 revisiondata=False,
631 631 assumehaveparentrevisions=False,
632 632 deltamode=CG_DELTAMODE_STD):
633 633 """Produce ``irevisiondelta`` for revisions.
634 634
635 635 Given an iterable of nodes, emits objects conforming to the
636 636 ``irevisiondelta`` interface that describe revisions in storage.
637 637
638 638 This method is a generator.
639 639
640 640 The input nodes may be unordered. Implementations must ensure that a
641 641 node's parents are emitted before the node itself. Transitively, this
642 642 means that a node may only be emitted once all its ancestors in
643 643 ``nodes`` have also been emitted.
644 644
645 645 By default, emits "index" data (the ``node``, ``p1node``, and
646 646 ``p2node`` attributes). If ``revisiondata`` is set, revision data
647 647 will also be present on the emitted objects.
648 648
649 649 With default argument values, implementations can choose to emit
650 650 either fulltext revision data or a delta. When emitting deltas,
651 651 implementations must consider whether the delta's base revision
652 652 fulltext is available to the receiver.
653 653
654 654 The base revision fulltext is guaranteed to be available if any of
655 655 the following are met:
656 656
657 657 * Its fulltext revision was emitted by this method call.
658 658 * A delta for that revision was emitted by this method call.
659 659 * ``assumehaveparentrevisions`` is True and the base revision is a
660 660 parent of the node.
661 661
662 662 ``nodesorder`` can be used to control the order that revisions are
663 663 emitted. By default, revisions can be reordered as long as they are
664 664 in DAG topological order (see above). If the value is ``nodes``,
665 665 the iteration order from ``nodes`` should be used. If the value is
666 666 ``storage``, then the native order from the backing storage layer
667 667 is used. (Not all storage layers will have strong ordering and behavior
668 668 of this mode is storage-dependent.) ``nodes`` ordering can force
669 669 revisions to be emitted before their ancestors, so consumers should
670 670 use it with care.
671 671
672 672 The ``linknode`` attribute on the returned ``irevisiondelta`` may not
673 673 be set and it is the caller's responsibility to resolve it, if needed.
674 674
675 675 If ``deltamode`` is CG_DELTAMODE_PREV and revision data is requested,
676 676 all revision data should be emitted as deltas against the revision
677 677 emitted just prior. The initial revision should be a delta against its
678 678 1st parent.
679 679 """
680 680
681 681 class ifilemutation(interfaceutil.Interface):
682 682 """Storage interface for mutation events of a tracked file."""
683 683
684 684 def add(filedata, meta, transaction, linkrev, p1, p2):
685 685 """Add a new revision to the store.
686 686
687 687 Takes file data, dictionary of metadata, a transaction, linkrev,
688 688 and parent nodes.
689 689
690 690 Returns the node that was added.
691 691
692 692 May no-op if a revision matching the supplied data is already stored.
693 693 """
694 694
695 695 def addrevision(revisiondata, transaction, linkrev, p1, p2, node=None,
696 696 flags=0, cachedelta=None):
697 697 """Add a new revision to the store.
698 698
699 699 This is similar to ``add()`` except it operates at a lower level.
700 700
701 701 The data passed in already contains a metadata header, if any.
702 702
703 703 ``node`` and ``flags`` can be used to define the expected node and
704 704 the flags to use with storage. ``flags`` is a bitwise value composed
705 705 of the various ``REVISION_FLAG_*`` constants.
706 706
707 707 ``add()`` is usually called when adding files from e.g. the working
708 708 directory. ``addrevision()`` is often called by ``add()`` and for
709 709 scenarios where revision data has already been computed, such as when
710 710 applying raw data from a peer repo.
711 711 """
712 712
713 713 def addgroup(deltas, linkmapper, transaction, addrevisioncb=None,
714 714 maybemissingparents=False):
715 715 """Process a series of deltas for storage.
716 716
717 717 ``deltas`` is an iterable of 7-tuples of
718 718 (node, p1, p2, linknode, deltabase, delta, flags) defining revisions
719 719 to add.
720 720
721 721 The ``delta`` field contains ``mpatch`` data to apply to a base
722 722 revision, identified by ``deltabase``. The base node can be
723 723 ``nullid``, in which case the header from the delta can be ignored
724 724 and the delta used as the fulltext.
725 725
726 726 ``addrevisioncb`` should be called for each node as it is committed.
727 727
728 728 ``maybemissingparents`` is a bool indicating whether the incoming
729 729 data may reference parents/ancestor revisions that aren't present.
730 730 This flag is set when receiving data into a "shallow" store that
731 731 doesn't hold all history.
732 732
733 733 Returns a list of nodes that were processed. A node will be in the list
734 734 even if it existed in the store previously.
735 735 """
736 736
737 737 def censorrevision(tr, node, tombstone=b''):
738 738 """Remove the content of a single revision.
739 739
740 740 The specified ``node`` will have its content purged from storage.
741 741 Future attempts to access the revision data for this node will
742 742 result in failure.
743 743
744 744 A ``tombstone`` message can optionally be stored. This message may be
745 745 displayed to users when they attempt to access the missing revision
746 746 data.
747 747
748 748 Storage backends may have stored deltas against the previous content
749 749 in this revision. As part of censoring a revision, these storage
750 750 backends are expected to rewrite any internally stored deltas such
751 751 that they no longer reference the deleted content.
752 752 """
753 753
754 754 def getstrippoint(minlink):
755 755 """Find the minimum revision that must be stripped to strip a linkrev.
756 756
757 757 Returns a 2-tuple containing the minimum revision number and a set
758 758 of all revisions numbers that would be broken by this strip.
759 759
760 760 TODO this is highly revlog centric and should be abstracted into
761 761 a higher-level deletion API. ``repair.strip()`` relies on this.
762 762 """
763 763
764 764 def strip(minlink, transaction):
765 765 """Remove storage of items starting at a linkrev.
766 766
767 767 This uses ``getstrippoint()`` to determine the first node to remove.
768 768 Then it effectively truncates storage for all revisions after that.
769 769
770 770 TODO this is highly revlog centric and should be abstracted into a
771 771 higher-level deletion API.
772 772 """
773 773
774 774 class ifilestorage(ifileindex, ifiledata, ifilemutation):
775 775 """Complete storage interface for a single tracked file."""
776 776
777 777 def files():
778 778 """Obtain paths that are backing storage for this file.
779 779
780 780 TODO this is used heavily by verify code and there should probably
781 781 be a better API for that.
782 782 """
783 783
784 784 def storageinfo(exclusivefiles=False, sharedfiles=False,
785 785 revisionscount=False, trackedsize=False,
786 786 storedsize=False):
787 787 """Obtain information about storage for this file's data.
788 788
789 789 Returns a dict describing storage for this tracked path. The keys
790 790 in the dict map to arguments of the same. The arguments are bools
791 791 indicating whether to calculate and obtain that data.
792 792
793 793 exclusivefiles
794 794 Iterable of (vfs, path) describing files that are exclusively
795 795 used to back storage for this tracked path.
796 796
797 797 sharedfiles
798 798 Iterable of (vfs, path) describing files that are used to back
799 799 storage for this tracked path. Those files may also provide storage
800 800 for other stored entities.
801 801
802 802 revisionscount
803 803 Number of revisions available for retrieval.
804 804
805 805 trackedsize
806 806 Total size in bytes of all tracked revisions. This is a sum of the
807 807 length of the fulltext of all revisions.
808 808
809 809 storedsize
810 810 Total size in bytes used to store data for all tracked revisions.
811 811 This is commonly less than ``trackedsize`` due to internal usage
812 812 of deltas rather than fulltext revisions.
813 813
814 814 Not all storage backends may support all queries are have a reasonable
815 815 value to use. In that case, the value should be set to ``None`` and
816 816 callers are expected to handle this special value.
817 817 """
818 818
819 819 def verifyintegrity(state):
820 820 """Verifies the integrity of file storage.
821 821
822 822 ``state`` is a dict holding state of the verifier process. It can be
823 823 used to communicate data between invocations of multiple storage
824 824 primitives.
825 825
826 826 If individual revisions cannot have their revision content resolved,
827 827 the method is expected to set the ``skipread`` key to a set of nodes
828 828 that encountered problems.
829 829
830 830 The method yields objects conforming to the ``iverifyproblem``
831 831 interface.
832 832 """
833 833
834 834 class idirs(interfaceutil.Interface):
835 835 """Interface representing a collection of directories from paths.
836 836
837 837 This interface is essentially a derived data structure representing
838 838 directories from a collection of paths.
839 839 """
840 840
841 841 def addpath(path):
842 842 """Add a path to the collection.
843 843
844 844 All directories in the path will be added to the collection.
845 845 """
846 846
847 847 def delpath(path):
848 848 """Remove a path from the collection.
849 849
850 850 If the removal was the last path in a particular directory, the
851 851 directory is removed from the collection.
852 852 """
853 853
854 854 def __iter__():
855 855 """Iterate over the directories in this collection of paths."""
856 856
857 857 def __contains__(path):
858 858 """Whether a specific directory is in this collection."""
859 859
860 860 class imanifestdict(interfaceutil.Interface):
861 861 """Interface representing a manifest data structure.
862 862
863 863 A manifest is effectively a dict mapping paths to entries. Each entry
864 864 consists of a binary node and extra flags affecting that entry.
865 865 """
866 866
867 867 def __getitem__(path):
868 868 """Returns the binary node value for a path in the manifest.
869 869
870 870 Raises ``KeyError`` if the path does not exist in the manifest.
871 871
872 872 Equivalent to ``self.find(path)[0]``.
873 873 """
874 874
875 875 def find(path):
876 876 """Returns the entry for a path in the manifest.
877 877
878 878 Returns a 2-tuple of (node, flags).
879 879
880 880 Raises ``KeyError`` if the path does not exist in the manifest.
881 881 """
882 882
883 883 def __len__():
884 884 """Return the number of entries in the manifest."""
885 885
886 886 def __nonzero__():
887 887 """Returns True if the manifest has entries, False otherwise."""
888 888
889 889 __bool__ = __nonzero__
890 890
891 891 def __setitem__(path, node):
892 892 """Define the node value for a path in the manifest.
893 893
894 894 If the path is already in the manifest, its flags will be copied to
895 895 the new entry.
896 896 """
897 897
898 898 def __contains__(path):
899 899 """Whether a path exists in the manifest."""
900 900
901 901 def __delitem__(path):
902 902 """Remove a path from the manifest.
903 903
904 904 Raises ``KeyError`` if the path is not in the manifest.
905 905 """
906 906
907 907 def __iter__():
908 908 """Iterate over paths in the manifest."""
909 909
910 910 def iterkeys():
911 911 """Iterate over paths in the manifest."""
912 912
913 913 def keys():
914 914 """Obtain a list of paths in the manifest."""
915 915
916 916 def filesnotin(other, match=None):
917 917 """Obtain the set of paths in this manifest but not in another.
918 918
919 919 ``match`` is an optional matcher function to be applied to both
920 920 manifests.
921 921
922 922 Returns a set of paths.
923 923 """
924 924
925 925 def dirs():
926 926 """Returns an object implementing the ``idirs`` interface."""
927 927
928 928 def hasdir(dir):
929 929 """Returns a bool indicating if a directory is in this manifest."""
930 930
931 931 def matches(match):
932 932 """Generate a new manifest filtered through a matcher.
933 933
934 934 Returns an object conforming to the ``imanifestdict`` interface.
935 935 """
936 936
937 937 def walk(match):
938 938 """Generator of paths in manifest satisfying a matcher.
939 939
940 940 This is equivalent to ``self.matches(match).iterkeys()`` except a new
941 941 manifest object is not created.
942 942
943 943 If the matcher has explicit files listed and they don't exist in
944 944 the manifest, ``match.bad()`` is called for each missing file.
945 945 """
946 946
947 947 def diff(other, match=None, clean=False):
948 948 """Find differences between this manifest and another.
949 949
950 950 This manifest is compared to ``other``.
951 951
952 952 If ``match`` is provided, the two manifests are filtered against this
953 953 matcher and only entries satisfying the matcher are compared.
954 954
955 955 If ``clean`` is True, unchanged files are included in the returned
956 956 object.
957 957
958 958 Returns a dict with paths as keys and values of 2-tuples of 2-tuples of
959 959 the form ``((node1, flag1), (node2, flag2))`` where ``(node1, flag1)``
960 960 represents the node and flags for this manifest and ``(node2, flag2)``
961 961 are the same for the other manifest.
962 962 """
963 963
964 964 def setflag(path, flag):
965 965 """Set the flag value for a given path.
966 966
967 967 Raises ``KeyError`` if the path is not already in the manifest.
968 968 """
969 969
970 970 def get(path, default=None):
971 971 """Obtain the node value for a path or a default value if missing."""
972 972
973 973 def flags(path, default=''):
974 974 """Return the flags value for a path or a default value if missing."""
975 975
976 976 def copy():
977 977 """Return a copy of this manifest."""
978 978
979 979 def items():
980 980 """Returns an iterable of (path, node) for items in this manifest."""
981 981
982 982 def iteritems():
983 983 """Identical to items()."""
984 984
985 985 def iterentries():
986 986 """Returns an iterable of (path, node, flags) for this manifest.
987 987
988 988 Similar to ``iteritems()`` except items are a 3-tuple and include
989 989 flags.
990 990 """
991 991
992 992 def text():
993 993 """Obtain the raw data representation for this manifest.
994 994
995 995 Result is used to create a manifest revision.
996 996 """
997 997
998 998 def fastdelta(base, changes):
999 999 """Obtain a delta between this manifest and another given changes.
1000 1000
1001 1001 ``base`` in the raw data representation for another manifest.
1002 1002
1003 1003 ``changes`` is an iterable of ``(path, to_delete)``.
1004 1004
1005 1005 Returns a 2-tuple containing ``bytearray(self.text())`` and the
1006 1006 delta between ``base`` and this manifest.
1007 1007 """
1008 1008
1009 1009 class imanifestrevisionbase(interfaceutil.Interface):
1010 1010 """Base interface representing a single revision of a manifest.
1011 1011
1012 1012 Should not be used as a primary interface: should always be inherited
1013 1013 as part of a larger interface.
1014 1014 """
1015 1015
1016 1016 def new():
1017 1017 """Obtain a new manifest instance.
1018 1018
1019 1019 Returns an object conforming to the ``imanifestrevisionwritable``
1020 1020 interface. The instance will be associated with the same
1021 1021 ``imanifestlog`` collection as this instance.
1022 1022 """
1023 1023
1024 1024 def copy():
1025 1025 """Obtain a copy of this manifest instance.
1026 1026
1027 1027 Returns an object conforming to the ``imanifestrevisionwritable``
1028 1028 interface. The instance will be associated with the same
1029 1029 ``imanifestlog`` collection as this instance.
1030 1030 """
1031 1031
1032 1032 def read():
1033 1033 """Obtain the parsed manifest data structure.
1034 1034
1035 1035 The returned object conforms to the ``imanifestdict`` interface.
1036 1036 """
1037 1037
1038 1038 class imanifestrevisionstored(imanifestrevisionbase):
1039 1039 """Interface representing a manifest revision committed to storage."""
1040 1040
1041 1041 def node():
1042 1042 """The binary node for this manifest."""
1043 1043
1044 1044 parents = interfaceutil.Attribute(
1045 1045 """List of binary nodes that are parents for this manifest revision."""
1046 1046 )
1047 1047
1048 1048 def readdelta(shallow=False):
1049 1049 """Obtain the manifest data structure representing changes from parent.
1050 1050
1051 1051 This manifest is compared to its 1st parent. A new manifest representing
1052 1052 those differences is constructed.
1053 1053
1054 1054 The returned object conforms to the ``imanifestdict`` interface.
1055 1055 """
1056 1056
1057 1057 def readfast(shallow=False):
1058 1058 """Calls either ``read()`` or ``readdelta()``.
1059 1059
1060 1060 The faster of the two options is called.
1061 1061 """
1062 1062
1063 1063 def find(key):
1064 1064 """Calls self.read().find(key)``.
1065 1065
1066 1066 Returns a 2-tuple of ``(node, flags)`` or raises ``KeyError``.
1067 1067 """
1068 1068
1069 1069 class imanifestrevisionwritable(imanifestrevisionbase):
1070 1070 """Interface representing a manifest revision that can be committed."""
1071 1071
1072 1072 def write(transaction, linkrev, p1node, p2node, added, removed, match=None):
1073 1073 """Add this revision to storage.
1074 1074
1075 1075 Takes a transaction object, the changeset revision number it will
1076 1076 be associated with, its parent nodes, and lists of added and
1077 1077 removed paths.
1078 1078
1079 1079 If match is provided, storage can choose not to inspect or write out
1080 1080 items that do not match. Storage is still required to be able to provide
1081 1081 the full manifest in the future for any directories written (these
1082 1082 manifests should not be "narrowed on disk").
1083 1083
1084 1084 Returns the binary node of the created revision.
1085 1085 """
1086 1086
1087 1087 class imanifeststorage(interfaceutil.Interface):
1088 1088 """Storage interface for manifest data."""
1089 1089
1090 1090 tree = interfaceutil.Attribute(
1091 1091 """The path to the directory this manifest tracks.
1092 1092
1093 1093 The empty bytestring represents the root manifest.
1094 1094 """)
1095 1095
1096 1096 index = interfaceutil.Attribute(
1097 1097 """An ``ifilerevisionssequence`` instance.""")
1098 1098
1099 1099 indexfile = interfaceutil.Attribute(
1100 1100 """Path of revlog index file.
1101 1101
1102 1102 TODO this is revlog specific and should not be exposed.
1103 1103 """)
1104 1104
1105 1105 opener = interfaceutil.Attribute(
1106 1106 """VFS opener to use to access underlying files used for storage.
1107 1107
1108 1108 TODO this is revlog specific and should not be exposed.
1109 1109 """)
1110 1110
1111 1111 version = interfaceutil.Attribute(
1112 1112 """Revlog version number.
1113 1113
1114 1114 TODO this is revlog specific and should not be exposed.
1115 1115 """)
1116 1116
1117 1117 _generaldelta = interfaceutil.Attribute(
1118 1118 """Whether generaldelta storage is being used.
1119 1119
1120 1120 TODO this is revlog specific and should not be exposed.
1121 1121 """)
1122 1122
1123 1123 fulltextcache = interfaceutil.Attribute(
1124 1124 """Dict with cache of fulltexts.
1125 1125
1126 1126 TODO this doesn't feel appropriate for the storage interface.
1127 1127 """)
1128 1128
1129 1129 def __len__():
1130 1130 """Obtain the number of revisions stored for this manifest."""
1131 1131
1132 1132 def __iter__():
1133 1133 """Iterate over revision numbers for this manifest."""
1134 1134
1135 1135 def rev(node):
1136 1136 """Obtain the revision number given a binary node.
1137 1137
1138 1138 Raises ``error.LookupError`` if the node is not known.
1139 1139 """
1140 1140
1141 1141 def node(rev):
1142 1142 """Obtain the node value given a revision number.
1143 1143
1144 1144 Raises ``error.LookupError`` if the revision is not known.
1145 1145 """
1146 1146
1147 1147 def lookup(value):
1148 1148 """Attempt to resolve a value to a node.
1149 1149
1150 1150 Value can be a binary node, hex node, revision number, or a bytes
1151 1151 that can be converted to an integer.
1152 1152
1153 1153 Raises ``error.LookupError`` if a ndoe could not be resolved.
1154 1154 """
1155 1155
1156 1156 def parents(node):
1157 1157 """Returns a 2-tuple of parent nodes for a node.
1158 1158
1159 1159 Values will be ``nullid`` if the parent is empty.
1160 1160 """
1161 1161
1162 1162 def parentrevs(rev):
1163 1163 """Like parents() but operates on revision numbers."""
1164 1164
1165 1165 def linkrev(rev):
1166 1166 """Obtain the changeset revision number a revision is linked to."""
1167 1167
1168 1168 def revision(node, _df=None, raw=False):
1169 1169 """Obtain fulltext data for a node."""
1170 1170
1171 1171 def rawdata(node, _df=None):
1172 1172 """Obtain raw data for a node."""
1173 1173
1174 1174 def revdiff(rev1, rev2):
1175 1175 """Obtain a delta between two revision numbers.
1176 1176
1177 1177 The returned data is the result of ``bdiff.bdiff()`` on the raw
1178 1178 revision data.
1179 1179 """
1180 1180
1181 1181 def cmp(node, fulltext):
1182 1182 """Compare fulltext to another revision.
1183 1183
1184 1184 Returns True if the fulltext is different from what is stored.
1185 1185 """
1186 1186
1187 1187 def emitrevisions(nodes,
1188 1188 nodesorder=None,
1189 1189 revisiondata=False,
1190 1190 assumehaveparentrevisions=False):
1191 1191 """Produce ``irevisiondelta`` describing revisions.
1192 1192
1193 1193 See the documentation for ``ifiledata`` for more.
1194 1194 """
1195 1195
1196 1196 def addgroup(deltas, linkmapper, transaction, addrevisioncb=None):
1197 1197 """Process a series of deltas for storage.
1198 1198
1199 1199 See the documentation in ``ifilemutation`` for more.
1200 1200 """
1201 1201
1202 1202 def rawsize(rev):
1203 1203 """Obtain the size of tracked data.
1204 1204
1205 1205 Is equivalent to ``len(m.rawdata(node))``.
1206 1206
1207 1207 TODO this method is only used by upgrade code and may be removed.
1208 1208 """
1209 1209
1210 1210 def getstrippoint(minlink):
1211 1211 """Find minimum revision that must be stripped to strip a linkrev.
1212 1212
1213 1213 See the documentation in ``ifilemutation`` for more.
1214 1214 """
1215 1215
1216 1216 def strip(minlink, transaction):
1217 1217 """Remove storage of items starting at a linkrev.
1218 1218
1219 1219 See the documentation in ``ifilemutation`` for more.
1220 1220 """
1221 1221
1222 1222 def checksize():
1223 1223 """Obtain the expected sizes of backing files.
1224 1224
1225 1225 TODO this is used by verify and it should not be part of the interface.
1226 1226 """
1227 1227
1228 1228 def files():
1229 1229 """Obtain paths that are backing storage for this manifest.
1230 1230
1231 1231 TODO this is used by verify and there should probably be a better API
1232 1232 for this functionality.
1233 1233 """
1234 1234
1235 1235 def deltaparent(rev):
1236 1236 """Obtain the revision that a revision is delta'd against.
1237 1237
1238 1238 TODO delta encoding is an implementation detail of storage and should
1239 1239 not be exposed to the storage interface.
1240 1240 """
1241 1241
1242 1242 def clone(tr, dest, **kwargs):
1243 1243 """Clone this instance to another."""
1244 1244
1245 1245 def clearcaches(clear_persisted_data=False):
1246 1246 """Clear any caches associated with this instance."""
1247 1247
1248 1248 def dirlog(d):
1249 1249 """Obtain a manifest storage instance for a tree."""
1250 1250
1251 1251 def add(m, transaction, link, p1, p2, added, removed, readtree=None,
1252 1252 match=None):
1253 1253 """Add a revision to storage.
1254 1254
1255 1255 ``m`` is an object conforming to ``imanifestdict``.
1256 1256
1257 1257 ``link`` is the linkrev revision number.
1258 1258
1259 1259 ``p1`` and ``p2`` are the parent revision numbers.
1260 1260
1261 1261 ``added`` and ``removed`` are iterables of added and removed paths,
1262 1262 respectively.
1263 1263
1264 1264 ``readtree`` is a function that can be used to read the child tree(s)
1265 1265 when recursively writing the full tree structure when using
1266 1266 treemanifets.
1267 1267
1268 1268 ``match`` is a matcher that can be used to hint to storage that not all
1269 1269 paths must be inspected; this is an optimization and can be safely
1270 1270 ignored. Note that the storage must still be able to reproduce a full
1271 1271 manifest including files that did not match.
1272 1272 """
1273 1273
1274 1274 def storageinfo(exclusivefiles=False, sharedfiles=False,
1275 1275 revisionscount=False, trackedsize=False,
1276 1276 storedsize=False):
1277 1277 """Obtain information about storage for this manifest's data.
1278 1278
1279 1279 See ``ifilestorage.storageinfo()`` for a description of this method.
1280 1280 This one behaves the same way, except for manifest data.
1281 1281 """
1282 1282
1283 1283 class imanifestlog(interfaceutil.Interface):
1284 1284 """Interface representing a collection of manifest snapshots.
1285 1285
1286 1286 Represents the root manifest in a repository.
1287 1287
1288 1288 Also serves as a means to access nested tree manifests and to cache
1289 1289 tree manifests.
1290 1290 """
1291 1291
1292 1292 def __getitem__(node):
1293 1293 """Obtain a manifest instance for a given binary node.
1294 1294
1295 1295 Equivalent to calling ``self.get('', node)``.
1296 1296
1297 1297 The returned object conforms to the ``imanifestrevisionstored``
1298 1298 interface.
1299 1299 """
1300 1300
1301 1301 def get(tree, node, verify=True):
1302 1302 """Retrieve the manifest instance for a given directory and binary node.
1303 1303
1304 1304 ``node`` always refers to the node of the root manifest (which will be
1305 1305 the only manifest if flat manifests are being used).
1306 1306
1307 1307 If ``tree`` is the empty string, the root manifest is returned.
1308 1308 Otherwise the manifest for the specified directory will be returned
1309 1309 (requires tree manifests).
1310 1310
1311 1311 If ``verify`` is True, ``LookupError`` is raised if the node is not
1312 1312 known.
1313 1313
1314 1314 The returned object conforms to the ``imanifestrevisionstored``
1315 1315 interface.
1316 1316 """
1317 1317
1318 1318 def getstorage(tree):
1319 1319 """Retrieve an interface to storage for a particular tree.
1320 1320
1321 1321 If ``tree`` is the empty bytestring, storage for the root manifest will
1322 1322 be returned. Otherwise storage for a tree manifest is returned.
1323 1323
1324 1324 TODO formalize interface for returned object.
1325 1325 """
1326 1326
1327 1327 def clearcaches():
1328 1328 """Clear caches associated with this collection."""
1329 1329
1330 1330 def rev(node):
1331 1331 """Obtain the revision number for a binary node.
1332 1332
1333 1333 Raises ``error.LookupError`` if the node is not known.
1334 1334 """
1335 1335
1336 1336 class ilocalrepositoryfilestorage(interfaceutil.Interface):
1337 1337 """Local repository sub-interface providing access to tracked file storage.
1338 1338
1339 1339 This interface defines how a repository accesses storage for a single
1340 1340 tracked file path.
1341 1341 """
1342 1342
1343 1343 def file(f):
1344 1344 """Obtain a filelog for a tracked path.
1345 1345
1346 1346 The returned type conforms to the ``ifilestorage`` interface.
1347 1347 """
1348 1348
1349 1349 class ilocalrepositorymain(interfaceutil.Interface):
1350 1350 """Main interface for local repositories.
1351 1351
1352 1352 This currently captures the reality of things - not how things should be.
1353 1353 """
1354 1354
1355 1355 supportedformats = interfaceutil.Attribute(
1356 1356 """Set of requirements that apply to stream clone.
1357 1357
1358 1358 This is actually a class attribute and is shared among all instances.
1359 1359 """)
1360 1360
1361 1361 supported = interfaceutil.Attribute(
1362 1362 """Set of requirements that this repo is capable of opening.""")
1363 1363
1364 1364 requirements = interfaceutil.Attribute(
1365 1365 """Set of requirements this repo uses.""")
1366 1366
1367 1367 features = interfaceutil.Attribute(
1368 1368 """Set of "features" this repository supports.
1369 1369
1370 1370 A "feature" is a loosely-defined term. It can refer to a feature
1371 1371 in the classical sense or can describe an implementation detail
1372 1372 of the repository. For example, a ``readonly`` feature may denote
1373 1373 the repository as read-only. Or a ``revlogfilestore`` feature may
1374 1374 denote that the repository is using revlogs for file storage.
1375 1375
1376 1376 The intent of features is to provide a machine-queryable mechanism
1377 1377 for repo consumers to test for various repository characteristics.
1378 1378
1379 1379 Features are similar to ``requirements``. The main difference is that
1380 1380 requirements are stored on-disk and represent requirements to open the
1381 1381 repository. Features are more run-time capabilities of the repository
1382 1382 and more granular capabilities (which may be derived from requirements).
1383 1383 """)
1384 1384
1385 1385 filtername = interfaceutil.Attribute(
1386 1386 """Name of the repoview that is active on this repo.""")
1387 1387
1388 1388 wvfs = interfaceutil.Attribute(
1389 1389 """VFS used to access the working directory.""")
1390 1390
1391 1391 vfs = interfaceutil.Attribute(
1392 1392 """VFS rooted at the .hg directory.
1393 1393
1394 1394 Used to access repository data not in the store.
1395 1395 """)
1396 1396
1397 1397 svfs = interfaceutil.Attribute(
1398 1398 """VFS rooted at the store.
1399 1399
1400 1400 Used to access repository data in the store. Typically .hg/store.
1401 1401 But can point elsewhere if the store is shared.
1402 1402 """)
1403 1403
1404 1404 root = interfaceutil.Attribute(
1405 1405 """Path to the root of the working directory.""")
1406 1406
1407 1407 path = interfaceutil.Attribute(
1408 1408 """Path to the .hg directory.""")
1409 1409
1410 1410 origroot = interfaceutil.Attribute(
1411 1411 """The filesystem path that was used to construct the repo.""")
1412 1412
1413 1413 auditor = interfaceutil.Attribute(
1414 1414 """A pathauditor for the working directory.
1415 1415
1416 1416 This checks if a path refers to a nested repository.
1417 1417
1418 1418 Operates on the filesystem.
1419 1419 """)
1420 1420
1421 1421 nofsauditor = interfaceutil.Attribute(
1422 1422 """A pathauditor for the working directory.
1423 1423
1424 1424 This is like ``auditor`` except it doesn't do filesystem checks.
1425 1425 """)
1426 1426
1427 1427 baseui = interfaceutil.Attribute(
1428 1428 """Original ui instance passed into constructor.""")
1429 1429
1430 1430 ui = interfaceutil.Attribute(
1431 1431 """Main ui instance for this instance.""")
1432 1432
1433 1433 sharedpath = interfaceutil.Attribute(
1434 1434 """Path to the .hg directory of the repo this repo was shared from.""")
1435 1435
1436 1436 store = interfaceutil.Attribute(
1437 1437 """A store instance.""")
1438 1438
1439 1439 spath = interfaceutil.Attribute(
1440 1440 """Path to the store.""")
1441 1441
1442 1442 sjoin = interfaceutil.Attribute(
1443 1443 """Alias to self.store.join.""")
1444 1444
1445 1445 cachevfs = interfaceutil.Attribute(
1446 1446 """A VFS used to access the cache directory.
1447 1447
1448 1448 Typically .hg/cache.
1449 1449 """)
1450 1450
1451 1451 wcachevfs = interfaceutil.Attribute(
1452 1452 """A VFS used to access the cache directory dedicated to working copy
1453 1453
1454 1454 Typically .hg/wcache.
1455 1455 """)
1456 1456
1457 1457 filteredrevcache = interfaceutil.Attribute(
1458 1458 """Holds sets of revisions to be filtered.""")
1459 1459
1460 1460 names = interfaceutil.Attribute(
1461 1461 """A ``namespaces`` instance.""")
1462 1462
1463 1463 def close():
1464 1464 """Close the handle on this repository."""
1465 1465
1466 1466 def peer():
1467 1467 """Obtain an object conforming to the ``peer`` interface."""
1468 1468
1469 1469 def unfiltered():
1470 1470 """Obtain an unfiltered/raw view of this repo."""
1471 1471
1472 1472 def filtered(name, visibilityexceptions=None):
1473 1473 """Obtain a named view of this repository."""
1474 1474
1475 1475 obsstore = interfaceutil.Attribute(
1476 1476 """A store of obsolescence data.""")
1477 1477
1478 1478 changelog = interfaceutil.Attribute(
1479 1479 """A handle on the changelog revlog.""")
1480 1480
1481 1481 manifestlog = interfaceutil.Attribute(
1482 1482 """An instance conforming to the ``imanifestlog`` interface.
1483 1483
1484 1484 Provides access to manifests for the repository.
1485 1485 """)
1486 1486
1487 1487 dirstate = interfaceutil.Attribute(
1488 1488 """Working directory state.""")
1489 1489
1490 1490 narrowpats = interfaceutil.Attribute(
1491 1491 """Matcher patterns for this repository's narrowspec.""")
1492 1492
1493 1493 def narrowmatch(match=None, includeexact=False):
1494 1494 """Obtain a matcher for the narrowspec."""
1495 1495
1496 1496 def setnarrowpats(newincludes, newexcludes):
1497 1497 """Define the narrowspec for this repository."""
1498 1498
1499 1499 def __getitem__(changeid):
1500 1500 """Try to resolve a changectx."""
1501 1501
1502 1502 def __contains__(changeid):
1503 1503 """Whether a changeset exists."""
1504 1504
1505 1505 def __nonzero__():
1506 1506 """Always returns True."""
1507 1507 return True
1508 1508
1509 1509 __bool__ = __nonzero__
1510 1510
1511 1511 def __len__():
1512 1512 """Returns the number of changesets in the repo."""
1513 1513
1514 1514 def __iter__():
1515 1515 """Iterate over revisions in the changelog."""
1516 1516
1517 1517 def revs(expr, *args):
1518 1518 """Evaluate a revset.
1519 1519
1520 1520 Emits revisions.
1521 1521 """
1522 1522
1523 1523 def set(expr, *args):
1524 1524 """Evaluate a revset.
1525 1525
1526 1526 Emits changectx instances.
1527 1527 """
1528 1528
1529 1529 def anyrevs(specs, user=False, localalias=None):
1530 1530 """Find revisions matching one of the given revsets."""
1531 1531
1532 1532 def url():
1533 1533 """Returns a string representing the location of this repo."""
1534 1534
1535 1535 def hook(name, throw=False, **args):
1536 1536 """Call a hook."""
1537 1537
1538 1538 def tags():
1539 1539 """Return a mapping of tag to node."""
1540 1540
1541 1541 def tagtype(tagname):
1542 1542 """Return the type of a given tag."""
1543 1543
1544 1544 def tagslist():
1545 1545 """Return a list of tags ordered by revision."""
1546 1546
1547 1547 def nodetags(node):
1548 1548 """Return the tags associated with a node."""
1549 1549
1550 1550 def nodebookmarks(node):
1551 1551 """Return the list of bookmarks pointing to the specified node."""
1552 1552
1553 1553 def branchmap():
1554 1554 """Return a mapping of branch to heads in that branch."""
1555 1555
1556 1556 def revbranchcache():
1557 1557 pass
1558 1558
1559 1559 def branchtip(branchtip, ignoremissing=False):
1560 1560 """Return the tip node for a given branch."""
1561 1561
1562 1562 def lookup(key):
1563 1563 """Resolve the node for a revision."""
1564 1564
1565 1565 def lookupbranch(key):
1566 1566 """Look up the branch name of the given revision or branch name."""
1567 1567
1568 1568 def known(nodes):
1569 1569 """Determine whether a series of nodes is known.
1570 1570
1571 1571 Returns a list of bools.
1572 1572 """
1573 1573
1574 1574 def local():
1575 1575 """Whether the repository is local."""
1576 1576 return True
1577 1577
1578 1578 def publishing():
1579 1579 """Whether the repository is a publishing repository."""
1580 1580
1581 1581 def cancopy():
1582 1582 pass
1583 1583
1584 1584 def shared():
1585 1585 """The type of shared repository or None."""
1586 1586
1587 1587 def wjoin(f, *insidef):
1588 1588 """Calls self.vfs.reljoin(self.root, f, *insidef)"""
1589 1589
1590 1590 def setparents(p1, p2):
1591 1591 """Set the parent nodes of the working directory."""
1592 1592
1593 1593 def filectx(path, changeid=None, fileid=None):
1594 1594 """Obtain a filectx for the given file revision."""
1595 1595
1596 1596 def getcwd():
1597 1597 """Obtain the current working directory from the dirstate."""
1598 1598
1599 1599 def pathto(f, cwd=None):
1600 1600 """Obtain the relative path to a file."""
1601 1601
1602 1602 def adddatafilter(name, fltr):
1603 1603 pass
1604 1604
1605 1605 def wread(filename):
1606 1606 """Read a file from wvfs, using data filters."""
1607 1607
1608 1608 def wwrite(filename, data, flags, backgroundclose=False, **kwargs):
1609 1609 """Write data to a file in the wvfs, using data filters."""
1610 1610
1611 1611 def wwritedata(filename, data):
1612 1612 """Resolve data for writing to the wvfs, using data filters."""
1613 1613
1614 1614 def currenttransaction():
1615 1615 """Obtain the current transaction instance or None."""
1616 1616
1617 1617 def transaction(desc, report=None):
1618 1618 """Open a new transaction to write to the repository."""
1619 1619
1620 1620 def undofiles():
1621 1621 """Returns a list of (vfs, path) for files to undo transactions."""
1622 1622
1623 1623 def recover():
1624 1624 """Roll back an interrupted transaction."""
1625 1625
1626 1626 def rollback(dryrun=False, force=False):
1627 1627 """Undo the last transaction.
1628 1628
1629 1629 DANGEROUS.
1630 1630 """
1631 1631
1632 1632 def updatecaches(tr=None, full=False):
1633 1633 """Warm repo caches."""
1634 1634
1635 1635 def invalidatecaches():
1636 1636 """Invalidate cached data due to the repository mutating."""
1637 1637
1638 1638 def invalidatevolatilesets():
1639 1639 pass
1640 1640
1641 1641 def invalidatedirstate():
1642 1642 """Invalidate the dirstate."""
1643 1643
1644 1644 def invalidate(clearfilecache=False):
1645 1645 pass
1646 1646
1647 1647 def invalidateall():
1648 1648 pass
1649 1649
1650 1650 def lock(wait=True):
1651 1651 """Lock the repository store and return a lock instance."""
1652 1652
1653 1653 def wlock(wait=True):
1654 1654 """Lock the non-store parts of the repository."""
1655 1655
1656 1656 def currentwlock():
1657 1657 """Return the wlock if it's held or None."""
1658 1658
1659 1659 def checkcommitpatterns(wctx, vdirs, match, status, fail):
1660 1660 pass
1661 1661
1662 1662 def commit(text='', user=None, date=None, match=None, force=False,
1663 1663 editor=False, extra=None):
1664 1664 """Add a new revision to the repository."""
1665 1665
1666 1666 def commitctx(ctx, error=False, origctx=None):
1667 1667 """Commit a commitctx instance to the repository."""
1668 1668
1669 1669 def destroying():
1670 1670 """Inform the repository that nodes are about to be destroyed."""
1671 1671
1672 1672 def destroyed():
1673 1673 """Inform the repository that nodes have been destroyed."""
1674 1674
1675 1675 def status(node1='.', node2=None, match=None, ignored=False,
1676 1676 clean=False, unknown=False, listsubrepos=False):
1677 1677 """Convenience method to call repo[x].status()."""
1678 1678
1679 1679 def addpostdsstatus(ps):
1680 1680 pass
1681 1681
1682 1682 def postdsstatus():
1683 1683 pass
1684 1684
1685 1685 def clearpostdsstatus():
1686 1686 pass
1687 1687
1688 1688 def heads(start=None):
1689 1689 """Obtain list of nodes that are DAG heads."""
1690 1690
1691 1691 def branchheads(branch=None, start=None, closed=False):
1692 1692 pass
1693 1693
1694 1694 def branches(nodes):
1695 1695 pass
1696 1696
1697 1697 def between(pairs):
1698 1698 pass
1699 1699
1700 1700 def checkpush(pushop):
1701 1701 pass
1702 1702
1703 1703 prepushoutgoinghooks = interfaceutil.Attribute(
1704 1704 """util.hooks instance.""")
1705 1705
1706 1706 def pushkey(namespace, key, old, new):
1707 1707 pass
1708 1708
1709 1709 def listkeys(namespace):
1710 1710 pass
1711 1711
1712 1712 def debugwireargs(one, two, three=None, four=None, five=None):
1713 1713 pass
1714 1714
1715 1715 def savecommitmessage(text):
1716 1716 pass
1717 1717
1718 1718 class completelocalrepository(ilocalrepositorymain,
1719 1719 ilocalrepositoryfilestorage):
1720 1720 """Complete interface for a local repository."""
1721 1721
1722 1722 class iwireprotocolcommandcacher(interfaceutil.Interface):
1723 1723 """Represents a caching backend for wire protocol commands.
1724 1724
1725 1725 Wire protocol version 2 supports transparent caching of many commands.
1726 1726 To leverage this caching, servers can activate objects that cache
1727 1727 command responses. Objects handle both cache writing and reading.
1728 1728 This interface defines how that response caching mechanism works.
1729 1729
1730 1730 Wire protocol version 2 commands emit a series of objects that are
1731 1731 serialized and sent to the client. The caching layer exists between
1732 1732 the invocation of the command function and the sending of its output
1733 1733 objects to an output layer.
1734 1734
1735 1735 Instances of this interface represent a binding to a cache that
1736 1736 can serve a response (in place of calling a command function) and/or
1737 1737 write responses to a cache for subsequent use.
1738 1738
1739 1739 When a command request arrives, the following happens with regards
1740 1740 to this interface:
1741 1741
1742 1742 1. The server determines whether the command request is cacheable.
1743 1743 2. If it is, an instance of this interface is spawned.
1744 1744 3. The cacher is activated in a context manager (``__enter__`` is called).
1745 1745 4. A cache *key* for that request is derived. This will call the
1746 1746 instance's ``adjustcachekeystate()`` method so the derivation
1747 1747 can be influenced.
1748 1748 5. The cacher is informed of the derived cache key via a call to
1749 1749 ``setcachekey()``.
1750 1750 6. The cacher's ``lookup()`` method is called to test for presence of
1751 1751 the derived key in the cache.
1752 1752 7. If ``lookup()`` returns a hit, that cached result is used in place
1753 1753 of invoking the command function. ``__exit__`` is called and the instance
1754 1754 is discarded.
1755 1755 8. The command function is invoked.
1756 1756 9. ``onobject()`` is called for each object emitted by the command
1757 1757 function.
1758 1758 10. After the final object is seen, ``onfinished()`` is called.
1759 1759 11. ``__exit__`` is called to signal the end of use of the instance.
1760 1760
1761 1761 Cache *key* derivation can be influenced by the instance.
1762 1762
1763 1763 Cache keys are initially derived by a deterministic representation of
1764 1764 the command request. This includes the command name, arguments, protocol
1765 1765 version, etc. This initial key derivation is performed by CBOR-encoding a
1766 1766 data structure and feeding that output into a hasher.
1767 1767
1768 1768 Instances of this interface can influence this initial key derivation
1769 1769 via ``adjustcachekeystate()``.
1770 1770
1771 1771 The instance is informed of the derived cache key via a call to
1772 1772 ``setcachekey()``. The instance must store the key locally so it can
1773 1773 be consulted on subsequent operations that may require it.
1774 1774
1775 1775 When constructed, the instance has access to a callable that can be used
1776 1776 for encoding response objects. This callable receives as its single
1777 1777 argument an object emitted by a command function. It returns an iterable
1778 1778 of bytes chunks representing the encoded object. Unless the cacher is
1779 1779 caching native Python objects in memory or has a way of reconstructing
1780 1780 the original Python objects, implementations typically call this function
1781 1781 to produce bytes from the output objects and then store those bytes in
1782 1782 the cache. When it comes time to re-emit those bytes, they are wrapped
1783 1783 in a ``wireprototypes.encodedresponse`` instance to tell the output
1784 1784 layer that they are pre-encoded.
1785 1785
1786 1786 When receiving the objects emitted by the command function, instances
1787 1787 can choose what to do with those objects. The simplest thing to do is
1788 1788 re-emit the original objects. They will be forwarded to the output
1789 1789 layer and will be processed as if the cacher did not exist.
1790 1790
1791 1791 Implementations could also choose to not emit objects - instead locally
1792 1792 buffering objects or their encoded representation. They could then emit
1793 1793 a single "coalesced" object when ``onfinished()`` is called. In
1794 1794 this way, the implementation would function as a filtering layer of
1795 1795 sorts.
1796 1796
1797 1797 When caching objects, typically the encoded form of the object will
1798 1798 be stored. Keep in mind that if the original object is forwarded to
1799 1799 the output layer, it will need to be encoded there as well. For large
1800 1800 output, this redundant encoding could add overhead. Implementations
1801 1801 could wrap the encoded object data in ``wireprototypes.encodedresponse``
1802 1802 instances to avoid this overhead.
1803 1803 """
1804 1804 def __enter__():
1805 1805 """Marks the instance as active.
1806 1806
1807 1807 Should return self.
1808 1808 """
1809 1809
1810 1810 def __exit__(exctype, excvalue, exctb):
1811 1811 """Called when cacher is no longer used.
1812 1812
1813 1813 This can be used by implementations to perform cleanup actions (e.g.
1814 1814 disconnecting network sockets, aborting a partially cached response.
1815 1815 """
1816 1816
1817 1817 def adjustcachekeystate(state):
1818 1818 """Influences cache key derivation by adjusting state to derive key.
1819 1819
1820 1820 A dict defining the state used to derive the cache key is passed.
1821 1821
1822 1822 Implementations can modify this dict to record additional state that
1823 1823 is wanted to influence key derivation.
1824 1824
1825 1825 Implementations are *highly* encouraged to not modify or delete
1826 1826 existing keys.
1827 1827 """
1828 1828
1829 1829 def setcachekey(key):
1830 1830 """Record the derived cache key for this request.
1831 1831
1832 1832 Instances may mutate the key for internal usage, as desired. e.g.
1833 1833 instances may wish to prepend the repo name, introduce path
1834 1834 components for filesystem or URL addressing, etc. Behavior is up to
1835 1835 the cache.
1836 1836
1837 1837 Returns a bool indicating if the request is cacheable by this
1838 1838 instance.
1839 1839 """
1840 1840
1841 1841 def lookup():
1842 1842 """Attempt to resolve an entry in the cache.
1843 1843
1844 1844 The instance is instructed to look for the cache key that it was
1845 1845 informed about via the call to ``setcachekey()``.
1846 1846
1847 1847 If there's no cache hit or the cacher doesn't wish to use the cached
1848 1848 entry, ``None`` should be returned.
1849 1849
1850 1850 Else, a dict defining the cached result should be returned. The
1851 1851 dict may have the following keys:
1852 1852
1853 1853 objs
1854 1854 An iterable of objects that should be sent to the client. That
1855 1855 iterable of objects is expected to be what the command function
1856 1856 would return if invoked or an equivalent representation thereof.
1857 1857 """
1858 1858
1859 1859 def onobject(obj):
1860 1860 """Called when a new object is emitted from the command function.
1861 1861
1862 1862 Receives as its argument the object that was emitted from the
1863 1863 command function.
1864 1864
1865 1865 This method returns an iterator of objects to forward to the output
1866 1866 layer. The easiest implementation is a generator that just
1867 1867 ``yield obj``.
1868 1868 """
1869 1869
1870 1870 def onfinished():
1871 1871 """Called after all objects have been emitted from the command function.
1872 1872
1873 1873 Implementations should return an iterator of objects to forward to
1874 1874 the output layer.
1875 1875
1876 1876 This method can be a generator.
1877 1877 """
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
General Comments 0
You need to be logged in to leave comments. Login now