streamclone.py
430 lines
| 15.3 KiB
| text/x-python
|
PythonLexer
/ mercurial / streamclone.py
Gregory Szorc
|
r26441 | # streamclone.py - producing and consuming streaming repository data | ||
# | ||||
# Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com> | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
from __future__ import absolute_import | ||||
Gregory Szorc
|
r26755 | import struct | ||
Gregory Szorc
|
r26443 | |||
Gregory Szorc
|
r26442 | from .i18n import _ | ||
Gregory Szorc
|
r26441 | from . import ( | ||
branchmap, | ||||
Gregory Szorc
|
r26442 | error, | ||
Gregory Szorc
|
r32744 | phases, | ||
Gregory Szorc
|
r26443 | store, | ||
Gregory Szorc
|
r26442 | util, | ||
Gregory Szorc
|
r26441 | ) | ||
Gregory Szorc
|
r26467 | def canperformstreamclone(pullop, bailifbundle2supported=False): | ||
Gregory Szorc
|
r26446 | """Whether it is possible to perform a streaming clone as part of pull. | ||
Gregory Szorc
|
r26445 | |||
Gregory Szorc
|
r26467 | ``bailifbundle2supported`` will cause the function to return False if | ||
bundle2 stream clones are supported. It should only be called by the | ||||
legacy stream clone code path. | ||||
Gregory Szorc
|
r26446 | Returns a tuple of (supported, requirements). ``supported`` is True if | ||
streaming clone is supported and False otherwise. ``requirements`` is | ||||
a set of repo requirements from the remote, or ``None`` if stream clone | ||||
isn't supported. | ||||
""" | ||||
Gregory Szorc
|
r26466 | repo = pullop.repo | ||
remote = pullop.remote | ||||
Gregory Szorc
|
r26467 | bundle2supported = False | ||
if pullop.canusebundle2: | ||||
if 'v1' in pullop.remotebundle2caps.get('stream', []): | ||||
bundle2supported = True | ||||
# else | ||||
# Server doesn't support bundle2 stream clone or doesn't support | ||||
# the versions we support. Fall back and possibly allow legacy. | ||||
# Ensures legacy code path uses available bundle2. | ||||
if bailifbundle2supported and bundle2supported: | ||||
return False, None | ||||
# Ensures bundle2 doesn't try to do a stream clone if it isn't supported. | ||||
#elif not bailifbundle2supported and not bundle2supported: | ||||
# return False, None | ||||
Gregory Szorc
|
r26447 | # Streaming clone only works on empty repositories. | ||
if len(repo): | ||||
return False, None | ||||
Gregory Szorc
|
r26446 | # Streaming clone only works if all data is being requested. | ||
Gregory Szorc
|
r26466 | if pullop.heads: | ||
Gregory Szorc
|
r26446 | return False, None | ||
Gregory Szorc
|
r26445 | |||
Gregory Szorc
|
r26466 | streamrequested = pullop.streamclonerequested | ||
Gregory Szorc
|
r26446 | # If we don't have a preference, let the server decide for us. This | ||
# likely only comes into play in LANs. | ||||
if streamrequested is None: | ||||
# The server can advertise whether to prefer streaming clone. | ||||
streamrequested = remote.capable('stream-preferred') | ||||
if not streamrequested: | ||||
return False, None | ||||
Gregory Szorc
|
r26445 | |||
Gregory Szorc
|
r26446 | # In order for stream clone to work, the client has to support all the | ||
# requirements advertised by the server. | ||||
# | ||||
# The server advertises its requirements via the "stream" and "streamreqs" | ||||
# capability. "stream" (a value-less capability) is advertised if and only | ||||
# if the only requirement is "revlogv1." Else, the "streamreqs" capability | ||||
# is advertised and contains a comma-delimited list of requirements. | ||||
requirements = set() | ||||
if remote.capable('stream'): | ||||
requirements.add('revlogv1') | ||||
else: | ||||
streamreqs = remote.capable('streamreqs') | ||||
# This is weird and shouldn't happen with modern servers. | ||||
if not streamreqs: | ||||
Siddharth Agarwal
|
r32259 | pullop.repo.ui.warn(_( | ||
'warning: stream clone requested but server has them ' | ||||
'disabled\n')) | ||||
Gregory Szorc
|
r26446 | return False, None | ||
streamreqs = set(streamreqs.split(',')) | ||||
# Server requires something we don't support. Bail. | ||||
Siddharth Agarwal
|
r32259 | missingreqs = streamreqs - repo.supportedformats | ||
if missingreqs: | ||||
pullop.repo.ui.warn(_( | ||||
'warning: stream clone requested but client is missing ' | ||||
'requirements: %s\n') % ', '.join(sorted(missingreqs))) | ||||
pullop.repo.ui.warn( | ||||
_('(see https://www.mercurial-scm.org/wiki/MissingRequirement ' | ||||
'for more information)\n')) | ||||
Gregory Szorc
|
r26446 | return False, None | ||
requirements = streamreqs | ||||
return True, requirements | ||||
Gregory Szorc
|
r26462 | def maybeperformlegacystreamclone(pullop): | ||
"""Possibly perform a legacy stream clone operation. | ||||
Legacy stream clones are performed as part of pull but before all other | ||||
operations. | ||||
A legacy stream clone will not be performed if a bundle2 stream clone is | ||||
supported. | ||||
""" | ||||
Gregory Szorc
|
r26466 | supported, requirements = canperformstreamclone(pullop) | ||
Gregory Szorc
|
r26458 | |||
Gregory Szorc
|
r26446 | if not supported: | ||
return | ||||
Gregory Szorc
|
r26466 | repo = pullop.repo | ||
remote = pullop.remote | ||||
Gregory Szorc
|
r26459 | # Save remote branchmap. We will use it later to speed up branchcache | ||
# creation. | ||||
rbranchmap = None | ||||
if remote.capable('branchmap'): | ||||
rbranchmap = remote.branchmap() | ||||
Gregory Szorc
|
r26470 | repo.ui.status(_('streaming all changes\n')) | ||
Gregory Szorc
|
r26459 | fp = remote.stream_out() | ||
l = fp.readline() | ||||
try: | ||||
resp = int(l) | ||||
except ValueError: | ||||
raise error.ResponseError( | ||||
_('unexpected response from remote server:'), l) | ||||
if resp == 1: | ||||
Pierre-Yves David
|
r26587 | raise error.Abort(_('operation forbidden by server')) | ||
Gregory Szorc
|
r26459 | elif resp == 2: | ||
Pierre-Yves David
|
r26587 | raise error.Abort(_('locking the remote repository failed')) | ||
Gregory Szorc
|
r26459 | elif resp != 0: | ||
Pierre-Yves David
|
r26587 | raise error.Abort(_('the server sent an unknown error code')) | ||
Gregory Szorc
|
r26459 | |||
Gregory Szorc
|
r26468 | l = fp.readline() | ||
try: | ||||
filecount, bytecount = map(int, l.split(' ', 1)) | ||||
except (ValueError, TypeError): | ||||
raise error.ResponseError( | ||||
_('unexpected response from remote server:'), l) | ||||
Bryan O'Sullivan
|
r27850 | with repo.lock(): | ||
Gregory Szorc
|
r26468 | consumev1(repo, fp, filecount, bytecount) | ||
Gregory Szorc
|
r26461 | |||
# new requirements = old non-format requirements + | ||||
# new format-related remote requirements | ||||
# requirements from the streamed-in repository | ||||
repo.requirements = requirements | ( | ||||
repo.requirements - repo.supportedformats) | ||||
repo._applyopenerreqs() | ||||
repo._writerequirements() | ||||
if rbranchmap: | ||||
branchmap.replacecache(repo, rbranchmap) | ||||
repo.invalidate() | ||||
Gregory Szorc
|
r26445 | |||
Gregory Szorc
|
r32744 | def allowservergeneration(repo): | ||
Gregory Szorc
|
r26444 | """Whether streaming clones are allowed from the server.""" | ||
Jun Wu
|
r33499 | if not repo.ui.configbool('server', 'uncompressed', untrusted=True): | ||
Gregory Szorc
|
r32744 | return False | ||
# The way stream clone works makes it impossible to hide secret changesets. | ||||
# So don't allow this by default. | ||||
secret = phases.hassecret(repo) | ||||
if secret: | ||||
r33223 | return repo.ui.configbool('server', 'uncompressedallowsecret') | |||
Gregory Szorc
|
r32744 | |||
return True | ||||
Gregory Szorc
|
r26444 | |||
Gregory Szorc
|
r26443 | # This is it's own function so extensions can override it. | ||
def _walkstreamfiles(repo): | ||||
return repo.store.walk() | ||||
def generatev1(repo): | ||||
"""Emit content for version 1 of a streaming clone. | ||||
Gregory Szorc
|
r26469 | This returns a 3-tuple of (file count, byte size, data iterator). | ||
Gregory Szorc
|
r26443 | |||
Gregory Szorc
|
r26469 | The data iterator consists of N entries for each file being transferred. | ||
Each file entry starts as a line with the file name and integer size | ||||
delimited by a null byte. | ||||
Gregory Szorc
|
r26443 | |||
The raw file data follows. Following the raw file data is the next file | ||||
entry, or EOF. | ||||
When used on the wire protocol, an additional line indicating protocol | ||||
success will be prepended to the stream. This function is not responsible | ||||
for adding it. | ||||
This function will obtain a repository lock to ensure a consistent view of | ||||
the store is captured. It therefore may raise LockError. | ||||
""" | ||||
entries = [] | ||||
total_bytes = 0 | ||||
# Get consistent snapshot of repo, lock during scan. | ||||
Bryan O'Sullivan
|
r27845 | with repo.lock(): | ||
Gregory Szorc
|
r26443 | repo.ui.debug('scanning\n') | ||
for name, ename, size in _walkstreamfiles(repo): | ||||
if size: | ||||
entries.append((name, size)) | ||||
total_bytes += size | ||||
repo.ui.debug('%d files, %d bytes to transfer\n' % | ||||
(len(entries), total_bytes)) | ||||
svfs = repo.svfs | ||||
debugflag = repo.ui.debugflag | ||||
Gregory Szorc
|
r26469 | def emitrevlogdata(): | ||
r33256 | for name, size in entries: | |||
if debugflag: | ||||
repo.ui.debug('sending %s (%d bytes)\n' % (name, size)) | ||||
# partially encode name over the wire for backwards compat | ||||
yield '%s\0%d\n' % (store.encodedir(name), size) | ||||
Yuya Nishihara
|
r33411 | # auditing at this stage is both pointless (paths are already | ||
# trusted by the local repo) and expensive | ||||
Yuya Nishihara
|
r33410 | with svfs(name, 'rb', auditpath=False) as fp: | ||
if size <= 65536: | ||||
r33256 | yield fp.read(size) | |||
Yuya Nishihara
|
r33410 | else: | ||
for chunk in util.filechunkiter(fp, limit=size): | ||||
yield chunk | ||||
Gregory Szorc
|
r26469 | |||
return len(entries), total_bytes, emitrevlogdata() | ||||
def generatev1wireproto(repo): | ||||
"""Emit content for version 1 of streaming clone suitable for the wire. | ||||
Gregory Szorc
|
r35507 | This is the data output from ``generatev1()`` with 2 header lines. The | ||
first line indicates overall success. The 2nd contains the file count and | ||||
byte size of payload. | ||||
The success line contains "0" for success, "1" for stream generation not | ||||
allowed, and "2" for error locking the repository (possibly indicating | ||||
a permissions error for the server process). | ||||
Gregory Szorc
|
r26469 | """ | ||
Gregory Szorc
|
r35507 | if not allowservergeneration(repo): | ||
yield '1\n' | ||||
return | ||||
try: | ||||
filecount, bytecount, it = generatev1(repo) | ||||
except error.LockError: | ||||
yield '2\n' | ||||
return | ||||
# Indicates successful response. | ||||
yield '0\n' | ||||
Gregory Szorc
|
r26469 | yield '%d %d\n' % (filecount, bytecount) | ||
for chunk in it: | ||||
yield chunk | ||||
Gregory Szorc
|
r26443 | |||
Gregory Szorc
|
r26755 | def generatebundlev1(repo, compression='UN'): | ||
"""Emit content for version 1 of a stream clone bundle. | ||||
The first 4 bytes of the output ("HGS1") denote this as stream clone | ||||
bundle version 1. | ||||
The next 2 bytes indicate the compression type. Only "UN" is currently | ||||
supported. | ||||
The next 16 bytes are two 64-bit big endian unsigned integers indicating | ||||
file count and byte count, respectively. | ||||
The next 2 bytes is a 16-bit big endian unsigned short declaring the length | ||||
of the requirements string, including a trailing \0. The following N bytes | ||||
are the requirements string, which is ASCII containing a comma-delimited | ||||
list of repo requirements that are needed to support the data. | ||||
The remaining content is the output of ``generatev1()`` (which may be | ||||
compressed in the future). | ||||
Returns a tuple of (requirements, data generator). | ||||
""" | ||||
if compression != 'UN': | ||||
raise ValueError('we do not support the compression argument yet') | ||||
requirements = repo.requirements & repo.supportedformats | ||||
requires = ','.join(sorted(requirements)) | ||||
def gen(): | ||||
yield 'HGS1' | ||||
yield compression | ||||
filecount, bytecount, it = generatev1(repo) | ||||
repo.ui.status(_('writing %d bytes for %d files\n') % | ||||
(bytecount, filecount)) | ||||
yield struct.pack('>QQ', filecount, bytecount) | ||||
yield struct.pack('>H', len(requires) + 1) | ||||
yield requires + '\0' | ||||
# This is where we'll add compression in the future. | ||||
assert compression == 'UN' | ||||
seen = 0 | ||||
r28473 | repo.ui.progress(_('bundle'), 0, total=bytecount, unit=_('bytes')) | |||
Gregory Szorc
|
r26755 | |||
for chunk in it: | ||||
seen += len(chunk) | ||||
r28473 | repo.ui.progress(_('bundle'), seen, total=bytecount, | |||
unit=_('bytes')) | ||||
Gregory Szorc
|
r26755 | yield chunk | ||
repo.ui.progress(_('bundle'), None) | ||||
return requirements, gen() | ||||
Gregory Szorc
|
r26468 | def consumev1(repo, fp, filecount, bytecount): | ||
Gregory Szorc
|
r26443 | """Apply the contents from version 1 of a streaming clone file handle. | ||
Mads Kiilerich
|
r30332 | This takes the output from "stream_out" and applies it to the specified | ||
Gregory Szorc
|
r26443 | repository. | ||
Mads Kiilerich
|
r30332 | Like "stream_out," the status line added by the wire protocol is not | ||
handled by this function. | ||||
Gregory Szorc
|
r26443 | """ | ||
Bryan O'Sullivan
|
r27859 | with repo.lock(): | ||
Gregory Szorc
|
r26443 | repo.ui.status(_('%d files to transfer, %s of data\n') % | ||
Gregory Szorc
|
r26468 | (filecount, util.bytecount(bytecount))) | ||
Gregory Szorc
|
r26443 | handled_bytes = 0 | ||
r28473 | repo.ui.progress(_('clone'), 0, total=bytecount, unit=_('bytes')) | |||
Simon Farnsworth
|
r30975 | start = util.timer() | ||
Gregory Szorc
|
r26443 | |||
FUJIWARA Katsunori
|
r29919 | # TODO: get rid of (potential) inconsistency | ||
# | ||||
# If transaction is started and any @filecache property is | ||||
# changed at this point, it causes inconsistency between | ||||
# in-memory cached property and streamclone-ed file on the | ||||
# disk. Nested transaction prevents transaction scope "clone" | ||||
# below from writing in-memory changes out at the end of it, | ||||
# even though in-memory changes are discarded at the end of it | ||||
# regardless of transaction nesting. | ||||
# | ||||
# But transaction nesting can't be simply prohibited, because | ||||
# nesting occurs also in ordinary case (e.g. enabling | ||||
# clonebundles). | ||||
Bryan O'Sullivan
|
r27870 | with repo.transaction('clone'): | ||
Gregory Szorc
|
r27897 | with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount): | ||
Gregory Szorc
|
r27896 | for i in xrange(filecount): | ||
# XXX doesn't support '\n' or '\r' in filenames | ||||
l = fp.readline() | ||||
try: | ||||
name, size = l.split('\0', 1) | ||||
size = int(size) | ||||
except (ValueError, TypeError): | ||||
raise error.ResponseError( | ||||
_('unexpected response from remote server:'), l) | ||||
if repo.ui.debugflag: | ||||
repo.ui.debug('adding %s (%s)\n' % | ||||
(name, util.bytecount(size))) | ||||
# for backwards compat, name was partially encoded | ||||
Gregory Szorc
|
r27897 | path = store.decodedir(name) | ||
with repo.svfs(path, 'w', backgroundclose=True) as ofp: | ||||
Gregory Szorc
|
r27896 | for chunk in util.filechunkiter(fp, limit=size): | ||
handled_bytes += len(chunk) | ||||
repo.ui.progress(_('clone'), handled_bytes, | ||||
r28473 | total=bytecount, unit=_('bytes')) | |||
Gregory Szorc
|
r27896 | ofp.write(chunk) | ||
Gregory Szorc
|
r26443 | |||
FUJIWARA Katsunori
|
r29919 | # force @filecache properties to be reloaded from | ||
# streamclone-ed file at next access | ||||
repo.invalidate(clearfilecache=True) | ||||
Gregory Szorc
|
r26443 | |||
Simon Farnsworth
|
r30975 | elapsed = util.timer() - start | ||
Gregory Szorc
|
r26443 | if elapsed <= 0: | ||
elapsed = 0.001 | ||||
repo.ui.progress(_('clone'), None) | ||||
repo.ui.status(_('transferred %s in %.1f seconds (%s/sec)\n') % | ||||
Gregory Szorc
|
r26468 | (util.bytecount(bytecount), elapsed, | ||
util.bytecount(bytecount / elapsed))) | ||||
Gregory Szorc
|
r26755 | |||
Gregory Szorc
|
r27882 | def readbundle1header(fp): | ||
Gregory Szorc
|
r26755 | compression = fp.read(2) | ||
if compression != 'UN': | ||||
raise error.Abort(_('only uncompressed stream clone bundles are ' | ||||
'supported; got %s') % compression) | ||||
filecount, bytecount = struct.unpack('>QQ', fp.read(16)) | ||||
requireslen = struct.unpack('>H', fp.read(2))[0] | ||||
requires = fp.read(requireslen) | ||||
if not requires.endswith('\0'): | ||||
raise error.Abort(_('malformed stream clone bundle: ' | ||||
'requirements not properly encoded')) | ||||
requirements = set(requires.rstrip('\0').split(',')) | ||||
Gregory Szorc
|
r27882 | |||
return filecount, bytecount, requirements | ||||
def applybundlev1(repo, fp): | ||||
"""Apply the content from a stream clone bundle version 1. | ||||
We assume the 4 byte header has been read and validated and the file handle | ||||
is at the 2 byte compression identifier. | ||||
""" | ||||
if len(repo): | ||||
raise error.Abort(_('cannot apply stream clone bundle on non-empty ' | ||||
'repo')) | ||||
filecount, bytecount, requirements = readbundle1header(fp) | ||||
Gregory Szorc
|
r26755 | missingreqs = requirements - repo.supportedformats | ||
if missingreqs: | ||||
raise error.Abort(_('unable to apply stream clone: ' | ||||
'unsupported format: %s') % | ||||
', '.join(sorted(missingreqs))) | ||||
consumev1(repo, fp, filecount, bytecount) | ||||
class streamcloneapplier(object): | ||||
"""Class to manage applying streaming clone bundles. | ||||
We need to wrap ``applybundlev1()`` in a dedicated type to enable bundle | ||||
readers to perform bundle type-specific functionality. | ||||
""" | ||||
def __init__(self, fh): | ||||
self._fh = fh | ||||
def apply(self, repo): | ||||
return applybundlev1(repo, self._fh) | ||||