##// END OF EJS Templates
changegroup: fix deltachunk API to be consistent from one class to another...
marmoute -
r48131:3f00665b default
parent child Browse files
Show More
@@ -1,319 +1,339 b''
1 1 # shallowbundle.py - bundle10 implementation for use with shallow repositories
2 2 #
3 3 # Copyright 2013 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 from __future__ import absolute_import
8 8
9 9 from mercurial.i18n import _
10 10 from mercurial.node import bin, hex
11 11 from mercurial import (
12 12 bundlerepo,
13 13 changegroup,
14 14 error,
15 15 match,
16 16 mdiff,
17 17 pycompat,
18 18 )
19 19 from . import (
20 20 constants,
21 21 remotefilelog,
22 22 shallowutil,
23 23 )
24 24
25 25 NoFiles = 0
26 26 LocalFiles = 1
27 27 AllFiles = 2
28 28
29 29
30 30 def shallowgroup(cls, self, nodelist, rlog, lookup, units=None, reorder=None):
31 31 if not isinstance(rlog, remotefilelog.remotefilelog):
32 32 for c in super(cls, self).group(nodelist, rlog, lookup, units=units):
33 33 yield c
34 34 return
35 35
36 36 if len(nodelist) == 0:
37 37 yield self.close()
38 38 return
39 39
40 40 nodelist = shallowutil.sortnodes(nodelist, rlog.parents)
41 41
42 42 # add the parent of the first rev
43 43 p = rlog.parents(nodelist[0])[0]
44 44 nodelist.insert(0, p)
45 45
46 46 # build deltas
47 47 for i in pycompat.xrange(len(nodelist) - 1):
48 48 prev, curr = nodelist[i], nodelist[i + 1]
49 49 linknode = lookup(curr)
50 50 for c in self.nodechunk(rlog, curr, prev, linknode):
51 51 yield c
52 52
53 53 yield self.close()
54 54
55 55
56 56 class shallowcg1packer(changegroup.cgpacker):
57 57 def generate(self, commonrevs, clnodes, fastpathlinkrev, source, **kwargs):
58 58 if shallowutil.isenabled(self._repo):
59 59 fastpathlinkrev = False
60 60
61 61 return super(shallowcg1packer, self).generate(
62 62 commonrevs, clnodes, fastpathlinkrev, source, **kwargs
63 63 )
64 64
65 65 def group(self, nodelist, rlog, lookup, units=None, reorder=None):
66 66 return shallowgroup(
67 67 shallowcg1packer, self, nodelist, rlog, lookup, units=units
68 68 )
69 69
70 70 def generatefiles(self, changedfiles, *args, **kwargs):
71 71 try:
72 72 linknodes, commonrevs, source = args
73 73 except ValueError:
74 74 commonrevs, source, mfdicts, fastpathlinkrev, fnodes, clrevs = args
75 75 if shallowutil.isenabled(self._repo):
76 76 repo = self._repo
77 77 if isinstance(repo, bundlerepo.bundlerepository):
78 78 # If the bundle contains filelogs, we can't pull from it, since
79 79 # bundlerepo is heavily tied to revlogs. Instead require that
80 80 # the user use unbundle instead.
81 81 # Force load the filelog data.
82 82 bundlerepo.bundlerepository.file(repo, b'foo')
83 83 if repo._cgfilespos:
84 84 raise error.Abort(
85 85 b"cannot pull from full bundles",
86 86 hint=b"use `hg unbundle` instead",
87 87 )
88 88 return []
89 89 filestosend = self.shouldaddfilegroups(source)
90 90 if filestosend == NoFiles:
91 91 changedfiles = list(
92 92 [f for f in changedfiles if not repo.shallowmatch(f)]
93 93 )
94 94
95 95 return super(shallowcg1packer, self).generatefiles(
96 96 changedfiles, *args, **kwargs
97 97 )
98 98
99 99 def shouldaddfilegroups(self, source):
100 100 repo = self._repo
101 101 if not shallowutil.isenabled(repo):
102 102 return AllFiles
103 103
104 104 if source == b"push" or source == b"bundle":
105 105 return AllFiles
106 106
107 107 # We won't actually strip the files, but we should put them in any
108 108 # backup bundle generated by strip (especially for cases like narrow's
109 109 # `hg tracked --removeinclude`, as failing to do so means that the
110 110 # "saved" changesets during a strip won't have their files reapplied and
111 111 # thus their linknode adjusted, if necessary).
112 112 if source == b"strip":
113 113 cfg = repo.ui.config(b'remotefilelog', b'strip.includefiles')
114 114 if cfg == b'local':
115 115 return LocalFiles
116 116 elif cfg != b'none':
117 117 return AllFiles
118 118
119 119 caps = self._bundlecaps or []
120 120 if source == b"serve" or source == b"pull":
121 121 if constants.BUNDLE2_CAPABLITY in caps:
122 122 return LocalFiles
123 123 else:
124 124 # Serving to a full repo requires us to serve everything
125 125 repo.ui.warn(_(b"pulling from a shallow repo\n"))
126 126 return AllFiles
127 127
128 128 return NoFiles
129 129
130 130 def prune(self, rlog, missing, commonrevs):
131 131 if not isinstance(rlog, remotefilelog.remotefilelog):
132 132 return super(shallowcg1packer, self).prune(
133 133 rlog, missing, commonrevs
134 134 )
135 135
136 136 repo = self._repo
137 137 results = []
138 138 for fnode in missing:
139 139 fctx = repo.filectx(rlog.filename, fileid=fnode)
140 140 if fctx.linkrev() not in commonrevs:
141 141 results.append(fnode)
142 142 return results
143 143
144 144 def nodechunk(self, revlog, node, prevnode, linknode):
145 145 prefix = b''
146 146 if prevnode == revlog.nullid:
147 147 delta = revlog.rawdata(node)
148 148 prefix = mdiff.trivialdiffheader(len(delta))
149 149 else:
150 150 # Actually uses remotefilelog.revdiff which works on nodes, not revs
151 151 delta = revlog.revdiff(prevnode, node)
152 152 p1, p2 = revlog.parents(node)
153 153 flags = revlog.flags(node)
154 154 meta = self.builddeltaheader(node, p1, p2, prevnode, linknode, flags)
155 155 meta += prefix
156 156 l = len(meta) + len(delta)
157 157 yield changegroup.chunkheader(l)
158 158 yield meta
159 159 yield delta
160 160
161 161
162 162 def makechangegroup(orig, repo, outgoing, version, source, *args, **kwargs):
163 163 if not shallowutil.isenabled(repo):
164 164 return orig(repo, outgoing, version, source, *args, **kwargs)
165 165
166 166 original = repo.shallowmatch
167 167 try:
168 168 # if serving, only send files the clients has patterns for
169 169 if source == b'serve':
170 170 bundlecaps = kwargs.get('bundlecaps')
171 171 includepattern = None
172 172 excludepattern = None
173 173 for cap in bundlecaps or []:
174 174 if cap.startswith(b"includepattern="):
175 175 raw = cap[len(b"includepattern=") :]
176 176 if raw:
177 177 includepattern = raw.split(b'\0')
178 178 elif cap.startswith(b"excludepattern="):
179 179 raw = cap[len(b"excludepattern=") :]
180 180 if raw:
181 181 excludepattern = raw.split(b'\0')
182 182 if includepattern or excludepattern:
183 183 repo.shallowmatch = match.match(
184 184 repo.root, b'', None, includepattern, excludepattern
185 185 )
186 186 else:
187 187 repo.shallowmatch = match.always()
188 188 return orig(repo, outgoing, version, source, *args, **kwargs)
189 189 finally:
190 190 repo.shallowmatch = original
191 191
192 192
193 193 def addchangegroupfiles(
194 194 orig, repo, source, revmap, trp, expectedfiles, *args, **kwargs
195 195 ):
196 196 if not shallowutil.isenabled(repo):
197 197 return orig(repo, source, revmap, trp, expectedfiles, *args, **kwargs)
198 198
199 199 newfiles = 0
200 200 visited = set()
201 201 revisiondatas = {}
202 202 queue = []
203 203
204 204 # Normal Mercurial processes each file one at a time, adding all
205 205 # the new revisions for that file at once. In remotefilelog a file
206 206 # revision may depend on a different file's revision (in the case
207 207 # of a rename/copy), so we must lay all revisions down across all
208 208 # files in topological order.
209 209
210 210 # read all the file chunks but don't add them
211 211 progress = repo.ui.makeprogress(_(b'files'), total=expectedfiles)
212 212 while True:
213 213 chunkdata = source.filelogheader()
214 214 if not chunkdata:
215 215 break
216 216 f = chunkdata[b"filename"]
217 217 repo.ui.debug(b"adding %s revisions\n" % f)
218 218 progress.increment()
219 219
220 220 if not repo.shallowmatch(f):
221 221 fl = repo.file(f)
222 222 deltas = source.deltaiter()
223 223 fl.addgroup(deltas, revmap, trp)
224 224 continue
225 225
226 226 chain = None
227 227 while True:
228 # returns: (node, p1, p2, cs, deltabase, delta, flags) or None
228 # returns: None or (
229 # node,
230 # p1,
231 # p2,
232 # cs,
233 # deltabase,
234 # delta,
235 # flags,
236 # sidedata,
237 # proto_flags
238 # )
229 239 revisiondata = source.deltachunk(chain)
230 240 if not revisiondata:
231 241 break
232 242
233 243 chain = revisiondata[0]
234 244
235 245 revisiondatas[(f, chain)] = revisiondata
236 246 queue.append((f, chain))
237 247
238 248 if f not in visited:
239 249 newfiles += 1
240 250 visited.add(f)
241 251
242 252 if chain is None:
243 253 raise error.Abort(_(b"received file revlog group is empty"))
244 254
245 255 processed = set()
246 256
247 257 def available(f, node, depf, depnode):
248 258 if depnode != repo.nullid and (depf, depnode) not in processed:
249 259 if not (depf, depnode) in revisiondatas:
250 260 # It's not in the changegroup, assume it's already
251 261 # in the repo
252 262 return True
253 263 # re-add self to queue
254 264 queue.insert(0, (f, node))
255 265 # add dependency in front
256 266 queue.insert(0, (depf, depnode))
257 267 return False
258 268 return True
259 269
260 270 skipcount = 0
261 271
262 272 # Prefetch the non-bundled revisions that we will need
263 273 prefetchfiles = []
264 274 for f, node in queue:
265 275 revisiondata = revisiondatas[(f, node)]
266 # revisiondata: (node, p1, p2, cs, deltabase, delta, flags)
276 # revisiondata: (node, p1, p2, cs, deltabase, delta, flags, sdata, pfl)
267 277 dependents = [revisiondata[1], revisiondata[2], revisiondata[4]]
268 278
269 279 for dependent in dependents:
270 280 if dependent == repo.nullid or (f, dependent) in revisiondatas:
271 281 continue
272 282 prefetchfiles.append((f, hex(dependent)))
273 283
274 284 repo.fileservice.prefetch(prefetchfiles)
275 285
276 286 # Apply the revisions in topological order such that a revision
277 287 # is only written once it's deltabase and parents have been written.
278 288 while queue:
279 289 f, node = queue.pop(0)
280 290 if (f, node) in processed:
281 291 continue
282 292
283 293 skipcount += 1
284 294 if skipcount > len(queue) + 1:
285 295 raise error.Abort(_(b"circular node dependency"))
286 296
287 297 fl = repo.file(f)
288 298
289 299 revisiondata = revisiondatas[(f, node)]
290 # revisiondata: (node, p1, p2, cs, deltabase, delta, flags)
291 node, p1, p2, linknode, deltabase, delta, flags, sidedata = revisiondata
300 # revisiondata: (node, p1, p2, cs, deltabase, delta, flags, sdata, pfl)
301 (
302 node,
303 p1,
304 p2,
305 linknode,
306 deltabase,
307 delta,
308 flags,
309 sidedata,
310 proto_flags,
311 ) = revisiondata
292 312
293 313 if not available(f, node, f, deltabase):
294 314 continue
295 315
296 316 base = fl.rawdata(deltabase)
297 317 text = mdiff.patch(base, delta)
298 318 if not isinstance(text, bytes):
299 319 text = bytes(text)
300 320
301 321 meta, text = shallowutil.parsemeta(text)
302 322 if b'copy' in meta:
303 323 copyfrom = meta[b'copy']
304 324 copynode = bin(meta[b'copyrev'])
305 325 if not available(f, node, copyfrom, copynode):
306 326 continue
307 327
308 328 for p in [p1, p2]:
309 329 if p != repo.nullid:
310 330 if not available(f, node, f, p):
311 331 continue
312 332
313 333 fl.add(text, meta, trp, linknode, p1, p2)
314 334 processed.add((f, node))
315 335 skipcount = 0
316 336
317 337 progress.complete()
318 338
319 339 return len(revisiondatas), newfiles
@@ -1,1958 +1,1980 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullrev,
18 18 short,
19 19 )
20 20 from .pycompat import open
21 21
22 22 from . import (
23 23 error,
24 24 match as matchmod,
25 25 mdiff,
26 26 phases,
27 27 pycompat,
28 28 requirements,
29 29 scmutil,
30 30 util,
31 31 )
32 32
33 33 from .interfaces import repository
34 34 from .revlogutils import sidedata as sidedatamod
35 35 from .revlogutils import constants as revlog_constants
36 36 from .utils import storageutil
37 37
38 38 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
39 39 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
40 40 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
41 41 _CHANGEGROUPV4_DELTA_HEADER = struct.Struct(b">B20s20s20s20s20sH")
42 42
43 43 LFS_REQUIREMENT = b'lfs'
44 44
45 45 readexactly = util.readexactly
46 46
47 47
48 48 def getchunk(stream):
49 49 """return the next chunk from stream as a string"""
50 50 d = readexactly(stream, 4)
51 51 l = struct.unpack(b">l", d)[0]
52 52 if l <= 4:
53 53 if l:
54 54 raise error.Abort(_(b"invalid chunk length %d") % l)
55 55 return b""
56 56 return readexactly(stream, l - 4)
57 57
58 58
59 59 def chunkheader(length):
60 60 """return a changegroup chunk header (string)"""
61 61 return struct.pack(b">l", length + 4)
62 62
63 63
64 64 def closechunk():
65 65 """return a changegroup chunk header (string) for a zero-length chunk"""
66 66 return struct.pack(b">l", 0)
67 67
68 68
69 69 def _fileheader(path):
70 70 """Obtain a changegroup chunk header for a named path."""
71 71 return chunkheader(len(path)) + path
72 72
73 73
74 74 def writechunks(ui, chunks, filename, vfs=None):
75 75 """Write chunks to a file and return its filename.
76 76
77 77 The stream is assumed to be a bundle file.
78 78 Existing files will not be overwritten.
79 79 If no filename is specified, a temporary file is created.
80 80 """
81 81 fh = None
82 82 cleanup = None
83 83 try:
84 84 if filename:
85 85 if vfs:
86 86 fh = vfs.open(filename, b"wb")
87 87 else:
88 88 # Increase default buffer size because default is usually
89 89 # small (4k is common on Linux).
90 90 fh = open(filename, b"wb", 131072)
91 91 else:
92 92 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
93 93 fh = os.fdopen(fd, "wb")
94 94 cleanup = filename
95 95 for c in chunks:
96 96 fh.write(c)
97 97 cleanup = None
98 98 return filename
99 99 finally:
100 100 if fh is not None:
101 101 fh.close()
102 102 if cleanup is not None:
103 103 if filename and vfs:
104 104 vfs.unlink(cleanup)
105 105 else:
106 106 os.unlink(cleanup)
107 107
108 108
109 109 class cg1unpacker(object):
110 110 """Unpacker for cg1 changegroup streams.
111 111
112 112 A changegroup unpacker handles the framing of the revision data in
113 113 the wire format. Most consumers will want to use the apply()
114 114 method to add the changes from the changegroup to a repository.
115 115
116 116 If you're forwarding a changegroup unmodified to another consumer,
117 117 use getchunks(), which returns an iterator of changegroup
118 118 chunks. This is mostly useful for cases where you need to know the
119 119 data stream has ended by observing the end of the changegroup.
120 120
121 121 deltachunk() is useful only if you're applying delta data. Most
122 122 consumers should prefer apply() instead.
123 123
124 124 A few other public methods exist. Those are used only for
125 125 bundlerepo and some debug commands - their use is discouraged.
126 126 """
127 127
128 128 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
129 129 deltaheadersize = deltaheader.size
130 130 version = b'01'
131 131 _grouplistcount = 1 # One list of files after the manifests
132 132
133 133 def __init__(self, fh, alg, extras=None):
134 134 if alg is None:
135 135 alg = b'UN'
136 136 if alg not in util.compengines.supportedbundletypes:
137 137 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
138 138 if alg == b'BZ':
139 139 alg = b'_truncatedBZ'
140 140
141 141 compengine = util.compengines.forbundletype(alg)
142 142 self._stream = compengine.decompressorreader(fh)
143 143 self._type = alg
144 144 self.extras = extras or {}
145 145 self.callback = None
146 146
147 147 # These methods (compressed, read, seek, tell) all appear to only
148 148 # be used by bundlerepo, but it's a little hard to tell.
149 149 def compressed(self):
150 150 return self._type is not None and self._type != b'UN'
151 151
152 152 def read(self, l):
153 153 return self._stream.read(l)
154 154
155 155 def seek(self, pos):
156 156 return self._stream.seek(pos)
157 157
158 158 def tell(self):
159 159 return self._stream.tell()
160 160
161 161 def close(self):
162 162 return self._stream.close()
163 163
164 164 def _chunklength(self):
165 165 d = readexactly(self._stream, 4)
166 166 l = struct.unpack(b">l", d)[0]
167 167 if l <= 4:
168 168 if l:
169 169 raise error.Abort(_(b"invalid chunk length %d") % l)
170 170 return 0
171 171 if self.callback:
172 172 self.callback()
173 173 return l - 4
174 174
175 175 def changelogheader(self):
176 176 """v10 does not have a changelog header chunk"""
177 177 return {}
178 178
179 179 def manifestheader(self):
180 180 """v10 does not have a manifest header chunk"""
181 181 return {}
182 182
183 183 def filelogheader(self):
184 184 """return the header of the filelogs chunk, v10 only has the filename"""
185 185 l = self._chunklength()
186 186 if not l:
187 187 return {}
188 188 fname = readexactly(self._stream, l)
189 189 return {b'filename': fname}
190 190
191 191 def _deltaheader(self, headertuple, prevnode):
192 192 node, p1, p2, cs = headertuple
193 193 if prevnode is None:
194 194 deltabase = p1
195 195 else:
196 196 deltabase = prevnode
197 197 flags = 0
198 198 protocol_flags = 0
199 199 return node, p1, p2, deltabase, cs, flags, protocol_flags
200 200
201 201 def deltachunk(self, prevnode):
202 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags, sidedata, proto_flags)
202 203 l = self._chunklength()
203 204 if not l:
204 205 return {}
205 206 headerdata = readexactly(self._stream, self.deltaheadersize)
206 207 header = self.deltaheader.unpack(headerdata)
207 208 delta = readexactly(self._stream, l - self.deltaheadersize)
208 209 header = self._deltaheader(header, prevnode)
209 210 node, p1, p2, deltabase, cs, flags, protocol_flags = header
210 return node, p1, p2, cs, deltabase, delta, flags, protocol_flags
211 return node, p1, p2, cs, deltabase, delta, flags, {}, protocol_flags
211 212
212 213 def getchunks(self):
213 214 """returns all the chunks contains in the bundle
214 215
215 216 Used when you need to forward the binary stream to a file or another
216 217 network API. To do so, it parse the changegroup data, otherwise it will
217 218 block in case of sshrepo because it don't know the end of the stream.
218 219 """
219 220 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
220 221 # and a list of filelogs. For changegroup 3, we expect 4 parts:
221 222 # changelog, manifestlog, a list of tree manifestlogs, and a list of
222 223 # filelogs.
223 224 #
224 225 # Changelog and manifestlog parts are terminated with empty chunks. The
225 226 # tree and file parts are a list of entry sections. Each entry section
226 227 # is a series of chunks terminating in an empty chunk. The list of these
227 228 # entry sections is terminated in yet another empty chunk, so we know
228 229 # we've reached the end of the tree/file list when we reach an empty
229 230 # chunk that was proceeded by no non-empty chunks.
230 231
231 232 parts = 0
232 233 while parts < 2 + self._grouplistcount:
233 234 noentries = True
234 235 while True:
235 236 chunk = getchunk(self)
236 237 if not chunk:
237 238 # The first two empty chunks represent the end of the
238 239 # changelog and the manifestlog portions. The remaining
239 240 # empty chunks represent either A) the end of individual
240 241 # tree or file entries in the file list, or B) the end of
241 242 # the entire list. It's the end of the entire list if there
242 243 # were no entries (i.e. noentries is True).
243 244 if parts < 2:
244 245 parts += 1
245 246 elif noentries:
246 247 parts += 1
247 248 break
248 249 noentries = False
249 250 yield chunkheader(len(chunk))
250 251 pos = 0
251 252 while pos < len(chunk):
252 253 next = pos + 2 ** 20
253 254 yield chunk[pos:next]
254 255 pos = next
255 256 yield closechunk()
256 257
257 258 def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
258 259 self.callback = prog.increment
259 260 # no need to check for empty manifest group here:
260 261 # if the result of the merge of 1 and 2 is the same in 3 and 4,
261 262 # no new manifest will be created and the manifest group will
262 263 # be empty during the pull
263 264 self.manifestheader()
264 265 deltas = self.deltaiter()
265 266 storage = repo.manifestlog.getstorage(b'')
266 267 storage.addgroup(deltas, revmap, trp, addrevisioncb=addrevisioncb)
267 268 prog.complete()
268 269 self.callback = None
269 270
270 271 def apply(
271 272 self,
272 273 repo,
273 274 tr,
274 275 srctype,
275 276 url,
276 277 targetphase=phases.draft,
277 278 expectedtotal=None,
278 279 sidedata_categories=None,
279 280 ):
280 281 """Add the changegroup returned by source.read() to this repo.
281 282 srctype is a string like 'push', 'pull', or 'unbundle'. url is
282 283 the URL of the repo where this changegroup is coming from.
283 284
284 285 Return an integer summarizing the change to this repo:
285 286 - nothing changed or no source: 0
286 287 - more heads than before: 1+added heads (2..n)
287 288 - fewer heads than before: -1-removed heads (-2..-n)
288 289 - number of heads stays the same: 1
289 290
290 291 `sidedata_categories` is an optional set of the remote's sidedata wanted
291 292 categories.
292 293 """
293 294 repo = repo.unfiltered()
294 295
295 296 # Only useful if we're adding sidedata categories. If both peers have
296 297 # the same categories, then we simply don't do anything.
297 298 adding_sidedata = (
298 299 (
299 300 requirements.REVLOGV2_REQUIREMENT in repo.requirements
300 301 or requirements.CHANGELOGV2_REQUIREMENT in repo.requirements
301 302 )
302 303 and self.version == b'04'
303 304 and srctype == b'pull'
304 305 )
305 306 if adding_sidedata:
306 307 sidedata_helpers = sidedatamod.get_sidedata_helpers(
307 308 repo,
308 309 sidedata_categories or set(),
309 310 pull=True,
310 311 )
311 312 else:
312 313 sidedata_helpers = None
313 314
314 315 def csmap(x):
315 316 repo.ui.debug(b"add changeset %s\n" % short(x))
316 317 return len(cl)
317 318
318 319 def revmap(x):
319 320 return cl.rev(x)
320 321
321 322 try:
322 323 # The transaction may already carry source information. In this
323 324 # case we use the top level data. We overwrite the argument
324 325 # because we need to use the top level value (if they exist)
325 326 # in this function.
326 327 srctype = tr.hookargs.setdefault(b'source', srctype)
327 328 tr.hookargs.setdefault(b'url', url)
328 329 repo.hook(
329 330 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
330 331 )
331 332
332 333 # write changelog data to temp files so concurrent readers
333 334 # will not see an inconsistent view
334 335 cl = repo.changelog
335 336 cl.delayupdate(tr)
336 337 oldheads = set(cl.heads())
337 338
338 339 trp = weakref.proxy(tr)
339 340 # pull off the changeset group
340 341 repo.ui.status(_(b"adding changesets\n"))
341 342 clstart = len(cl)
342 343 progress = repo.ui.makeprogress(
343 344 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
344 345 )
345 346 self.callback = progress.increment
346 347
347 348 efilesset = set()
348 349 duprevs = []
349 350
350 351 def ondupchangelog(cl, rev):
351 352 if rev < clstart:
352 353 duprevs.append(rev)
353 354
354 355 def onchangelog(cl, rev):
355 356 ctx = cl.changelogrevision(rev)
356 357 efilesset.update(ctx.files)
357 358 repo.register_changeset(rev, ctx)
358 359
359 360 self.changelogheader()
360 361 deltas = self.deltaiter()
361 362 if not cl.addgroup(
362 363 deltas,
363 364 csmap,
364 365 trp,
365 366 alwayscache=True,
366 367 addrevisioncb=onchangelog,
367 368 duplicaterevisioncb=ondupchangelog,
368 369 ):
369 370 repo.ui.develwarn(
370 371 b'applied empty changelog from changegroup',
371 372 config=b'warn-empty-changegroup',
372 373 )
373 374 efiles = len(efilesset)
374 375 clend = len(cl)
375 376 changesets = clend - clstart
376 377 progress.complete()
377 378 del deltas
378 379 # TODO Python 2.7 removal
379 380 # del efilesset
380 381 efilesset = None
381 382 self.callback = None
382 383
383 384 # Keep track of the (non-changelog) revlogs we've updated and their
384 385 # range of new revisions for sidedata rewrite.
385 386 # TODO do something more efficient than keeping the reference to
386 387 # the revlogs, especially memory-wise.
387 388 touched_manifests = {}
388 389 touched_filelogs = {}
389 390
390 391 # pull off the manifest group
391 392 repo.ui.status(_(b"adding manifests\n"))
392 393 # We know that we'll never have more manifests than we had
393 394 # changesets.
394 395 progress = repo.ui.makeprogress(
395 396 _(b'manifests'), unit=_(b'chunks'), total=changesets
396 397 )
397 398 on_manifest_rev = None
398 399 if sidedata_helpers:
399 400 if revlog_constants.KIND_MANIFESTLOG in sidedata_helpers[1]:
400 401
401 402 def on_manifest_rev(manifest, rev):
402 403 range = touched_manifests.get(manifest)
403 404 if not range:
404 405 touched_manifests[manifest] = (rev, rev)
405 406 else:
406 407 assert rev == range[1] + 1
407 408 touched_manifests[manifest] = (range[0], rev)
408 409
409 410 self._unpackmanifests(
410 411 repo,
411 412 revmap,
412 413 trp,
413 414 progress,
414 415 addrevisioncb=on_manifest_rev,
415 416 )
416 417
417 418 needfiles = {}
418 419 if repo.ui.configbool(b'server', b'validate'):
419 420 cl = repo.changelog
420 421 ml = repo.manifestlog
421 422 # validate incoming csets have their manifests
422 423 for cset in pycompat.xrange(clstart, clend):
423 424 mfnode = cl.changelogrevision(cset).manifest
424 425 mfest = ml[mfnode].readdelta()
425 426 # store file nodes we must see
426 427 for f, n in pycompat.iteritems(mfest):
427 428 needfiles.setdefault(f, set()).add(n)
428 429
429 430 on_filelog_rev = None
430 431 if sidedata_helpers:
431 432 if revlog_constants.KIND_FILELOG in sidedata_helpers[1]:
432 433
433 434 def on_filelog_rev(filelog, rev):
434 435 range = touched_filelogs.get(filelog)
435 436 if not range:
436 437 touched_filelogs[filelog] = (rev, rev)
437 438 else:
438 439 assert rev == range[1] + 1
439 440 touched_filelogs[filelog] = (range[0], rev)
440 441
441 442 # process the files
442 443 repo.ui.status(_(b"adding file changes\n"))
443 444 newrevs, newfiles = _addchangegroupfiles(
444 445 repo,
445 446 self,
446 447 revmap,
447 448 trp,
448 449 efiles,
449 450 needfiles,
450 451 addrevisioncb=on_filelog_rev,
451 452 )
452 453
453 454 if sidedata_helpers:
454 455 if revlog_constants.KIND_CHANGELOG in sidedata_helpers[1]:
455 456 cl.rewrite_sidedata(
456 457 trp, sidedata_helpers, clstart, clend - 1
457 458 )
458 459 for mf, (startrev, endrev) in touched_manifests.items():
459 460 mf.rewrite_sidedata(trp, sidedata_helpers, startrev, endrev)
460 461 for fl, (startrev, endrev) in touched_filelogs.items():
461 462 fl.rewrite_sidedata(trp, sidedata_helpers, startrev, endrev)
462 463
463 464 # making sure the value exists
464 465 tr.changes.setdefault(b'changegroup-count-changesets', 0)
465 466 tr.changes.setdefault(b'changegroup-count-revisions', 0)
466 467 tr.changes.setdefault(b'changegroup-count-files', 0)
467 468 tr.changes.setdefault(b'changegroup-count-heads', 0)
468 469
469 470 # some code use bundle operation for internal purpose. They usually
470 471 # set `ui.quiet` to do this outside of user sight. Size the report
471 472 # of such operation now happens at the end of the transaction, that
472 473 # ui.quiet has not direct effect on the output.
473 474 #
474 475 # To preserve this intend use an inelegant hack, we fail to report
475 476 # the change if `quiet` is set. We should probably move to
476 477 # something better, but this is a good first step to allow the "end
477 478 # of transaction report" to pass tests.
478 479 if not repo.ui.quiet:
479 480 tr.changes[b'changegroup-count-changesets'] += changesets
480 481 tr.changes[b'changegroup-count-revisions'] += newrevs
481 482 tr.changes[b'changegroup-count-files'] += newfiles
482 483
483 484 deltaheads = 0
484 485 if oldheads:
485 486 heads = cl.heads()
486 487 deltaheads += len(heads) - len(oldheads)
487 488 for h in heads:
488 489 if h not in oldheads and repo[h].closesbranch():
489 490 deltaheads -= 1
490 491
491 492 # see previous comment about checking ui.quiet
492 493 if not repo.ui.quiet:
493 494 tr.changes[b'changegroup-count-heads'] += deltaheads
494 495 repo.invalidatevolatilesets()
495 496
496 497 if changesets > 0:
497 498 if b'node' not in tr.hookargs:
498 499 tr.hookargs[b'node'] = hex(cl.node(clstart))
499 500 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
500 501 hookargs = dict(tr.hookargs)
501 502 else:
502 503 hookargs = dict(tr.hookargs)
503 504 hookargs[b'node'] = hex(cl.node(clstart))
504 505 hookargs[b'node_last'] = hex(cl.node(clend - 1))
505 506 repo.hook(
506 507 b'pretxnchangegroup',
507 508 throw=True,
508 509 **pycompat.strkwargs(hookargs)
509 510 )
510 511
511 512 added = pycompat.xrange(clstart, clend)
512 513 phaseall = None
513 514 if srctype in (b'push', b'serve'):
514 515 # Old servers can not push the boundary themselves.
515 516 # New servers won't push the boundary if changeset already
516 517 # exists locally as secret
517 518 #
518 519 # We should not use added here but the list of all change in
519 520 # the bundle
520 521 if repo.publishing():
521 522 targetphase = phaseall = phases.public
522 523 else:
523 524 # closer target phase computation
524 525
525 526 # Those changesets have been pushed from the
526 527 # outside, their phases are going to be pushed
527 528 # alongside. Therefor `targetphase` is
528 529 # ignored.
529 530 targetphase = phaseall = phases.draft
530 531 if added:
531 532 phases.registernew(repo, tr, targetphase, added)
532 533 if phaseall is not None:
533 534 if duprevs:
534 535 duprevs.extend(added)
535 536 else:
536 537 duprevs = added
537 538 phases.advanceboundary(repo, tr, phaseall, [], revs=duprevs)
538 539 duprevs = []
539 540
540 541 if changesets > 0:
541 542
542 543 def runhooks(unused_success):
543 544 # These hooks run when the lock releases, not when the
544 545 # transaction closes. So it's possible for the changelog
545 546 # to have changed since we last saw it.
546 547 if clstart >= len(repo):
547 548 return
548 549
549 550 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
550 551
551 552 for rev in added:
552 553 args = hookargs.copy()
553 554 args[b'node'] = hex(cl.node(rev))
554 555 del args[b'node_last']
555 556 repo.hook(b"incoming", **pycompat.strkwargs(args))
556 557
557 558 newheads = [h for h in repo.heads() if h not in oldheads]
558 559 repo.ui.log(
559 560 b"incoming",
560 561 b"%d incoming changes - new heads: %s\n",
561 562 len(added),
562 563 b', '.join([hex(c[:6]) for c in newheads]),
563 564 )
564 565
565 566 tr.addpostclose(
566 567 b'changegroup-runhooks-%020i' % clstart,
567 568 lambda tr: repo._afterlock(runhooks),
568 569 )
569 570 finally:
570 571 repo.ui.flush()
571 572 # never return 0 here:
572 573 if deltaheads < 0:
573 574 ret = deltaheads - 1
574 575 else:
575 576 ret = deltaheads + 1
576 577 return ret
577 578
578 579 def deltaiter(self):
579 580 """
580 581 returns an iterator of the deltas in this changegroup
581 582
582 583 Useful for passing to the underlying storage system to be stored.
583 584 """
584 585 chain = None
585 586 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
586 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags, sidedata)
587 yield chunkdata
587 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags, sidedata, proto_flags)
588 yield chunkdata[:8]
588 589 chain = chunkdata[0]
589 590
590 591
591 592 class cg2unpacker(cg1unpacker):
592 593 """Unpacker for cg2 streams.
593 594
594 595 cg2 streams add support for generaldelta, so the delta header
595 596 format is slightly different. All other features about the data
596 597 remain the same.
597 598 """
598 599
599 600 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
600 601 deltaheadersize = deltaheader.size
601 602 version = b'02'
602 603
603 604 def _deltaheader(self, headertuple, prevnode):
604 605 node, p1, p2, deltabase, cs = headertuple
605 606 flags = 0
606 607 protocol_flags = 0
607 608 return node, p1, p2, deltabase, cs, flags, protocol_flags
608 609
609 610
610 611 class cg3unpacker(cg2unpacker):
611 612 """Unpacker for cg3 streams.
612 613
613 614 cg3 streams add support for exchanging treemanifests and revlog
614 615 flags. It adds the revlog flags to the delta header and an empty chunk
615 616 separating manifests and files.
616 617 """
617 618
618 619 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
619 620 deltaheadersize = deltaheader.size
620 621 version = b'03'
621 622 _grouplistcount = 2 # One list of manifests and one list of files
622 623
623 624 def _deltaheader(self, headertuple, prevnode):
624 625 node, p1, p2, deltabase, cs, flags = headertuple
625 626 protocol_flags = 0
626 627 return node, p1, p2, deltabase, cs, flags, protocol_flags
627 628
628 629 def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
629 630 super(cg3unpacker, self)._unpackmanifests(
630 631 repo, revmap, trp, prog, addrevisioncb=addrevisioncb
631 632 )
632 633 for chunkdata in iter(self.filelogheader, {}):
633 634 # If we get here, there are directory manifests in the changegroup
634 635 d = chunkdata[b"filename"]
635 636 repo.ui.debug(b"adding %s revisions\n" % d)
636 637 deltas = self.deltaiter()
637 638 if not repo.manifestlog.getstorage(d).addgroup(
638 639 deltas, revmap, trp, addrevisioncb=addrevisioncb
639 640 ):
640 641 raise error.Abort(_(b"received dir revlog group is empty"))
641 642
642 643
643 644 class cg4unpacker(cg3unpacker):
644 645 """Unpacker for cg4 streams.
645 646
646 647 cg4 streams add support for exchanging sidedata.
647 648 """
648 649
649 650 deltaheader = _CHANGEGROUPV4_DELTA_HEADER
650 651 deltaheadersize = deltaheader.size
651 652 version = b'04'
652 653
653 654 def _deltaheader(self, headertuple, prevnode):
654 655 protocol_flags, node, p1, p2, deltabase, cs, flags = headertuple
655 656 return node, p1, p2, deltabase, cs, flags, protocol_flags
656 657
657 658 def deltachunk(self, prevnode):
658 659 res = super(cg4unpacker, self).deltachunk(prevnode)
659 660 if not res:
660 661 return res
661 662
662 (node, p1, p2, cs, deltabase, delta, flags, protocol_flags) = res
663 (
664 node,
665 p1,
666 p2,
667 cs,
668 deltabase,
669 delta,
670 flags,
671 sidedata,
672 protocol_flags,
673 ) = res
674 assert not sidedata
663 675
664 676 sidedata = {}
665 677 if protocol_flags & storageutil.CG_FLAG_SIDEDATA:
666 678 sidedata_raw = getchunk(self._stream)
667 679 sidedata = sidedatamod.deserialize_sidedata(sidedata_raw)
668 680
669 return node, p1, p2, cs, deltabase, delta, flags, sidedata
681 return (
682 node,
683 p1,
684 p2,
685 cs,
686 deltabase,
687 delta,
688 flags,
689 sidedata,
690 protocol_flags,
691 )
670 692
671 693
672 694 class headerlessfixup(object):
673 695 def __init__(self, fh, h):
674 696 self._h = h
675 697 self._fh = fh
676 698
677 699 def read(self, n):
678 700 if self._h:
679 701 d, self._h = self._h[:n], self._h[n:]
680 702 if len(d) < n:
681 703 d += readexactly(self._fh, n - len(d))
682 704 return d
683 705 return readexactly(self._fh, n)
684 706
685 707
686 708 def _revisiondeltatochunks(repo, delta, headerfn):
687 709 """Serialize a revisiondelta to changegroup chunks."""
688 710
689 711 # The captured revision delta may be encoded as a delta against
690 712 # a base revision or as a full revision. The changegroup format
691 713 # requires that everything on the wire be deltas. So for full
692 714 # revisions, we need to invent a header that says to rewrite
693 715 # data.
694 716
695 717 if delta.delta is not None:
696 718 prefix, data = b'', delta.delta
697 719 elif delta.basenode == repo.nullid:
698 720 data = delta.revision
699 721 prefix = mdiff.trivialdiffheader(len(data))
700 722 else:
701 723 data = delta.revision
702 724 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
703 725
704 726 meta = headerfn(delta)
705 727
706 728 yield chunkheader(len(meta) + len(prefix) + len(data))
707 729 yield meta
708 730 if prefix:
709 731 yield prefix
710 732 yield data
711 733
712 734 if delta.protocol_flags & storageutil.CG_FLAG_SIDEDATA:
713 735 # Need a separate chunk for sidedata to be able to differentiate
714 736 # "raw delta" length and sidedata length
715 737 sidedata = delta.sidedata
716 738 yield chunkheader(len(sidedata))
717 739 yield sidedata
718 740
719 741
720 742 def _sortnodesellipsis(store, nodes, cl, lookup):
721 743 """Sort nodes for changegroup generation."""
722 744 # Ellipses serving mode.
723 745 #
724 746 # In a perfect world, we'd generate better ellipsis-ified graphs
725 747 # for non-changelog revlogs. In practice, we haven't started doing
726 748 # that yet, so the resulting DAGs for the manifestlog and filelogs
727 749 # are actually full of bogus parentage on all the ellipsis
728 750 # nodes. This has the side effect that, while the contents are
729 751 # correct, the individual DAGs might be completely out of whack in
730 752 # a case like 882681bc3166 and its ancestors (back about 10
731 753 # revisions or so) in the main hg repo.
732 754 #
733 755 # The one invariant we *know* holds is that the new (potentially
734 756 # bogus) DAG shape will be valid if we order the nodes in the
735 757 # order that they're introduced in dramatis personae by the
736 758 # changelog, so what we do is we sort the non-changelog histories
737 759 # by the order in which they are used by the changelog.
738 760 key = lambda n: cl.rev(lookup(n))
739 761 return sorted(nodes, key=key)
740 762
741 763
742 764 def _resolvenarrowrevisioninfo(
743 765 cl,
744 766 store,
745 767 ischangelog,
746 768 rev,
747 769 linkrev,
748 770 linknode,
749 771 clrevtolocalrev,
750 772 fullclnodes,
751 773 precomputedellipsis,
752 774 ):
753 775 linkparents = precomputedellipsis[linkrev]
754 776
755 777 def local(clrev):
756 778 """Turn a changelog revnum into a local revnum.
757 779
758 780 The ellipsis dag is stored as revnums on the changelog,
759 781 but when we're producing ellipsis entries for
760 782 non-changelog revlogs, we need to turn those numbers into
761 783 something local. This does that for us, and during the
762 784 changelog sending phase will also expand the stored
763 785 mappings as needed.
764 786 """
765 787 if clrev == nullrev:
766 788 return nullrev
767 789
768 790 if ischangelog:
769 791 return clrev
770 792
771 793 # Walk the ellipsis-ized changelog breadth-first looking for a
772 794 # change that has been linked from the current revlog.
773 795 #
774 796 # For a flat manifest revlog only a single step should be necessary
775 797 # as all relevant changelog entries are relevant to the flat
776 798 # manifest.
777 799 #
778 800 # For a filelog or tree manifest dirlog however not every changelog
779 801 # entry will have been relevant, so we need to skip some changelog
780 802 # nodes even after ellipsis-izing.
781 803 walk = [clrev]
782 804 while walk:
783 805 p = walk[0]
784 806 walk = walk[1:]
785 807 if p in clrevtolocalrev:
786 808 return clrevtolocalrev[p]
787 809 elif p in fullclnodes:
788 810 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
789 811 elif p in precomputedellipsis:
790 812 walk.extend(
791 813 [pp for pp in precomputedellipsis[p] if pp != nullrev]
792 814 )
793 815 else:
794 816 # In this case, we've got an ellipsis with parents
795 817 # outside the current bundle (likely an
796 818 # incremental pull). We "know" that we can use the
797 819 # value of this same revlog at whatever revision
798 820 # is pointed to by linknode. "Know" is in scare
799 821 # quotes because I haven't done enough examination
800 822 # of edge cases to convince myself this is really
801 823 # a fact - it works for all the (admittedly
802 824 # thorough) cases in our testsuite, but I would be
803 825 # somewhat unsurprised to find a case in the wild
804 826 # where this breaks down a bit. That said, I don't
805 827 # know if it would hurt anything.
806 828 for i in pycompat.xrange(rev, 0, -1):
807 829 if store.linkrev(i) == clrev:
808 830 return i
809 831 # We failed to resolve a parent for this node, so
810 832 # we crash the changegroup construction.
811 833 if util.safehasattr(store, 'target'):
812 834 target = store.display_id
813 835 else:
814 836 # some revlog not actually a revlog
815 837 target = store._revlog.display_id
816 838
817 839 raise error.Abort(
818 840 b"unable to resolve parent while packing '%s' %r"
819 841 b' for changeset %r' % (target, rev, clrev)
820 842 )
821 843
822 844 return nullrev
823 845
824 846 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
825 847 p1, p2 = nullrev, nullrev
826 848 elif len(linkparents) == 1:
827 849 (p1,) = sorted(local(p) for p in linkparents)
828 850 p2 = nullrev
829 851 else:
830 852 p1, p2 = sorted(local(p) for p in linkparents)
831 853
832 854 p1node, p2node = store.node(p1), store.node(p2)
833 855
834 856 return p1node, p2node, linknode
835 857
836 858
837 859 def deltagroup(
838 860 repo,
839 861 store,
840 862 nodes,
841 863 ischangelog,
842 864 lookup,
843 865 forcedeltaparentprev,
844 866 topic=None,
845 867 ellipses=False,
846 868 clrevtolocalrev=None,
847 869 fullclnodes=None,
848 870 precomputedellipsis=None,
849 871 sidedata_helpers=None,
850 872 ):
851 873 """Calculate deltas for a set of revisions.
852 874
853 875 Is a generator of ``revisiondelta`` instances.
854 876
855 877 If topic is not None, progress detail will be generated using this
856 878 topic name (e.g. changesets, manifests, etc).
857 879
858 880 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
859 881 `sidedata_helpers`.
860 882 """
861 883 if not nodes:
862 884 return
863 885
864 886 cl = repo.changelog
865 887
866 888 if ischangelog:
867 889 # `hg log` shows changesets in storage order. To preserve order
868 890 # across clones, send out changesets in storage order.
869 891 nodesorder = b'storage'
870 892 elif ellipses:
871 893 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
872 894 nodesorder = b'nodes'
873 895 else:
874 896 nodesorder = None
875 897
876 898 # Perform ellipses filtering and revision massaging. We do this before
877 899 # emitrevisions() because a) filtering out revisions creates less work
878 900 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
879 901 # assumptions about delta choices and we would possibly send a delta
880 902 # referencing a missing base revision.
881 903 #
882 904 # Also, calling lookup() has side-effects with regards to populating
883 905 # data structures. If we don't call lookup() for each node or if we call
884 906 # lookup() after the first pass through each node, things can break -
885 907 # possibly intermittently depending on the python hash seed! For that
886 908 # reason, we store a mapping of all linknodes during the initial node
887 909 # pass rather than use lookup() on the output side.
888 910 if ellipses:
889 911 filtered = []
890 912 adjustedparents = {}
891 913 linknodes = {}
892 914
893 915 for node in nodes:
894 916 rev = store.rev(node)
895 917 linknode = lookup(node)
896 918 linkrev = cl.rev(linknode)
897 919 clrevtolocalrev[linkrev] = rev
898 920
899 921 # If linknode is in fullclnodes, it means the corresponding
900 922 # changeset was a full changeset and is being sent unaltered.
901 923 if linknode in fullclnodes:
902 924 linknodes[node] = linknode
903 925
904 926 # If the corresponding changeset wasn't in the set computed
905 927 # as relevant to us, it should be dropped outright.
906 928 elif linkrev not in precomputedellipsis:
907 929 continue
908 930
909 931 else:
910 932 # We could probably do this later and avoid the dict
911 933 # holding state. But it likely doesn't matter.
912 934 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
913 935 cl,
914 936 store,
915 937 ischangelog,
916 938 rev,
917 939 linkrev,
918 940 linknode,
919 941 clrevtolocalrev,
920 942 fullclnodes,
921 943 precomputedellipsis,
922 944 )
923 945
924 946 adjustedparents[node] = (p1node, p2node)
925 947 linknodes[node] = linknode
926 948
927 949 filtered.append(node)
928 950
929 951 nodes = filtered
930 952
931 953 # We expect the first pass to be fast, so we only engage the progress
932 954 # meter for constructing the revision deltas.
933 955 progress = None
934 956 if topic is not None:
935 957 progress = repo.ui.makeprogress(
936 958 topic, unit=_(b'chunks'), total=len(nodes)
937 959 )
938 960
939 961 configtarget = repo.ui.config(b'devel', b'bundle.delta')
940 962 if configtarget not in (b'', b'p1', b'full'):
941 963 msg = _(b"""config "devel.bundle.delta" as unknown value: %s""")
942 964 repo.ui.warn(msg % configtarget)
943 965
944 966 deltamode = repository.CG_DELTAMODE_STD
945 967 if forcedeltaparentprev:
946 968 deltamode = repository.CG_DELTAMODE_PREV
947 969 elif configtarget == b'p1':
948 970 deltamode = repository.CG_DELTAMODE_P1
949 971 elif configtarget == b'full':
950 972 deltamode = repository.CG_DELTAMODE_FULL
951 973
952 974 revisions = store.emitrevisions(
953 975 nodes,
954 976 nodesorder=nodesorder,
955 977 revisiondata=True,
956 978 assumehaveparentrevisions=not ellipses,
957 979 deltamode=deltamode,
958 980 sidedata_helpers=sidedata_helpers,
959 981 )
960 982
961 983 for i, revision in enumerate(revisions):
962 984 if progress:
963 985 progress.update(i + 1)
964 986
965 987 if ellipses:
966 988 linknode = linknodes[revision.node]
967 989
968 990 if revision.node in adjustedparents:
969 991 p1node, p2node = adjustedparents[revision.node]
970 992 revision.p1node = p1node
971 993 revision.p2node = p2node
972 994 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
973 995
974 996 else:
975 997 linknode = lookup(revision.node)
976 998
977 999 revision.linknode = linknode
978 1000 yield revision
979 1001
980 1002 if progress:
981 1003 progress.complete()
982 1004
983 1005
984 1006 class cgpacker(object):
985 1007 def __init__(
986 1008 self,
987 1009 repo,
988 1010 oldmatcher,
989 1011 matcher,
990 1012 version,
991 1013 builddeltaheader,
992 1014 manifestsend,
993 1015 forcedeltaparentprev=False,
994 1016 bundlecaps=None,
995 1017 ellipses=False,
996 1018 shallow=False,
997 1019 ellipsisroots=None,
998 1020 fullnodes=None,
999 1021 remote_sidedata=None,
1000 1022 ):
1001 1023 """Given a source repo, construct a bundler.
1002 1024
1003 1025 oldmatcher is a matcher that matches on files the client already has.
1004 1026 These will not be included in the changegroup.
1005 1027
1006 1028 matcher is a matcher that matches on files to include in the
1007 1029 changegroup. Used to facilitate sparse changegroups.
1008 1030
1009 1031 forcedeltaparentprev indicates whether delta parents must be against
1010 1032 the previous revision in a delta group. This should only be used for
1011 1033 compatibility with changegroup version 1.
1012 1034
1013 1035 builddeltaheader is a callable that constructs the header for a group
1014 1036 delta.
1015 1037
1016 1038 manifestsend is a chunk to send after manifests have been fully emitted.
1017 1039
1018 1040 ellipses indicates whether ellipsis serving mode is enabled.
1019 1041
1020 1042 bundlecaps is optional and can be used to specify the set of
1021 1043 capabilities which can be used to build the bundle. While bundlecaps is
1022 1044 unused in core Mercurial, extensions rely on this feature to communicate
1023 1045 capabilities to customize the changegroup packer.
1024 1046
1025 1047 shallow indicates whether shallow data might be sent. The packer may
1026 1048 need to pack file contents not introduced by the changes being packed.
1027 1049
1028 1050 fullnodes is the set of changelog nodes which should not be ellipsis
1029 1051 nodes. We store this rather than the set of nodes that should be
1030 1052 ellipsis because for very large histories we expect this to be
1031 1053 significantly smaller.
1032 1054
1033 1055 remote_sidedata is the set of sidedata categories wanted by the remote.
1034 1056 """
1035 1057 assert oldmatcher
1036 1058 assert matcher
1037 1059 self._oldmatcher = oldmatcher
1038 1060 self._matcher = matcher
1039 1061
1040 1062 self.version = version
1041 1063 self._forcedeltaparentprev = forcedeltaparentprev
1042 1064 self._builddeltaheader = builddeltaheader
1043 1065 self._manifestsend = manifestsend
1044 1066 self._ellipses = ellipses
1045 1067
1046 1068 # Set of capabilities we can use to build the bundle.
1047 1069 if bundlecaps is None:
1048 1070 bundlecaps = set()
1049 1071 self._bundlecaps = bundlecaps
1050 1072 if remote_sidedata is None:
1051 1073 remote_sidedata = set()
1052 1074 self._remote_sidedata = remote_sidedata
1053 1075 self._isshallow = shallow
1054 1076 self._fullclnodes = fullnodes
1055 1077
1056 1078 # Maps ellipsis revs to their roots at the changelog level.
1057 1079 self._precomputedellipsis = ellipsisroots
1058 1080
1059 1081 self._repo = repo
1060 1082
1061 1083 if self._repo.ui.verbose and not self._repo.ui.debugflag:
1062 1084 self._verbosenote = self._repo.ui.note
1063 1085 else:
1064 1086 self._verbosenote = lambda s: None
1065 1087
1066 1088 def generate(
1067 1089 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
1068 1090 ):
1069 1091 """Yield a sequence of changegroup byte chunks.
1070 1092 If changelog is False, changelog data won't be added to changegroup
1071 1093 """
1072 1094
1073 1095 repo = self._repo
1074 1096 cl = repo.changelog
1075 1097
1076 1098 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
1077 1099 size = 0
1078 1100
1079 1101 sidedata_helpers = None
1080 1102 if self.version == b'04':
1081 1103 remote_sidedata = self._remote_sidedata
1082 1104 if source == b'strip':
1083 1105 # We're our own remote when stripping, get the no-op helpers
1084 1106 # TODO a better approach would be for the strip bundle to
1085 1107 # correctly advertise its sidedata categories directly.
1086 1108 remote_sidedata = repo._wanted_sidedata
1087 1109 sidedata_helpers = sidedatamod.get_sidedata_helpers(
1088 1110 repo, remote_sidedata
1089 1111 )
1090 1112
1091 1113 clstate, deltas = self._generatechangelog(
1092 1114 cl,
1093 1115 clnodes,
1094 1116 generate=changelog,
1095 1117 sidedata_helpers=sidedata_helpers,
1096 1118 )
1097 1119 for delta in deltas:
1098 1120 for chunk in _revisiondeltatochunks(
1099 1121 self._repo, delta, self._builddeltaheader
1100 1122 ):
1101 1123 size += len(chunk)
1102 1124 yield chunk
1103 1125
1104 1126 close = closechunk()
1105 1127 size += len(close)
1106 1128 yield closechunk()
1107 1129
1108 1130 self._verbosenote(_(b'%8.i (changelog)\n') % size)
1109 1131
1110 1132 clrevorder = clstate[b'clrevorder']
1111 1133 manifests = clstate[b'manifests']
1112 1134 changedfiles = clstate[b'changedfiles']
1113 1135
1114 1136 # We need to make sure that the linkrev in the changegroup refers to
1115 1137 # the first changeset that introduced the manifest or file revision.
1116 1138 # The fastpath is usually safer than the slowpath, because the filelogs
1117 1139 # are walked in revlog order.
1118 1140 #
1119 1141 # When taking the slowpath when the manifest revlog uses generaldelta,
1120 1142 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
1121 1143 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
1122 1144 #
1123 1145 # When taking the fastpath, we are only vulnerable to reordering
1124 1146 # of the changelog itself. The changelog never uses generaldelta and is
1125 1147 # never reordered. To handle this case, we simply take the slowpath,
1126 1148 # which already has the 'clrevorder' logic. This was also fixed in
1127 1149 # cc0ff93d0c0c.
1128 1150
1129 1151 # Treemanifests don't work correctly with fastpathlinkrev
1130 1152 # either, because we don't discover which directory nodes to
1131 1153 # send along with files. This could probably be fixed.
1132 1154 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
1133 1155
1134 1156 fnodes = {} # needed file nodes
1135 1157
1136 1158 size = 0
1137 1159 it = self.generatemanifests(
1138 1160 commonrevs,
1139 1161 clrevorder,
1140 1162 fastpathlinkrev,
1141 1163 manifests,
1142 1164 fnodes,
1143 1165 source,
1144 1166 clstate[b'clrevtomanifestrev'],
1145 1167 sidedata_helpers=sidedata_helpers,
1146 1168 )
1147 1169
1148 1170 for tree, deltas in it:
1149 1171 if tree:
1150 1172 assert self.version in (b'03', b'04')
1151 1173 chunk = _fileheader(tree)
1152 1174 size += len(chunk)
1153 1175 yield chunk
1154 1176
1155 1177 for delta in deltas:
1156 1178 chunks = _revisiondeltatochunks(
1157 1179 self._repo, delta, self._builddeltaheader
1158 1180 )
1159 1181 for chunk in chunks:
1160 1182 size += len(chunk)
1161 1183 yield chunk
1162 1184
1163 1185 close = closechunk()
1164 1186 size += len(close)
1165 1187 yield close
1166 1188
1167 1189 self._verbosenote(_(b'%8.i (manifests)\n') % size)
1168 1190 yield self._manifestsend
1169 1191
1170 1192 mfdicts = None
1171 1193 if self._ellipses and self._isshallow:
1172 1194 mfdicts = [
1173 1195 (repo.manifestlog[n].read(), lr)
1174 1196 for (n, lr) in pycompat.iteritems(manifests)
1175 1197 ]
1176 1198
1177 1199 manifests.clear()
1178 1200 clrevs = {cl.rev(x) for x in clnodes}
1179 1201
1180 1202 it = self.generatefiles(
1181 1203 changedfiles,
1182 1204 commonrevs,
1183 1205 source,
1184 1206 mfdicts,
1185 1207 fastpathlinkrev,
1186 1208 fnodes,
1187 1209 clrevs,
1188 1210 sidedata_helpers=sidedata_helpers,
1189 1211 )
1190 1212
1191 1213 for path, deltas in it:
1192 1214 h = _fileheader(path)
1193 1215 size = len(h)
1194 1216 yield h
1195 1217
1196 1218 for delta in deltas:
1197 1219 chunks = _revisiondeltatochunks(
1198 1220 self._repo, delta, self._builddeltaheader
1199 1221 )
1200 1222 for chunk in chunks:
1201 1223 size += len(chunk)
1202 1224 yield chunk
1203 1225
1204 1226 close = closechunk()
1205 1227 size += len(close)
1206 1228 yield close
1207 1229
1208 1230 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1209 1231
1210 1232 yield closechunk()
1211 1233
1212 1234 if clnodes:
1213 1235 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1214 1236
1215 1237 def _generatechangelog(
1216 1238 self, cl, nodes, generate=True, sidedata_helpers=None
1217 1239 ):
1218 1240 """Generate data for changelog chunks.
1219 1241
1220 1242 Returns a 2-tuple of a dict containing state and an iterable of
1221 1243 byte chunks. The state will not be fully populated until the
1222 1244 chunk stream has been fully consumed.
1223 1245
1224 1246 if generate is False, the state will be fully populated and no chunk
1225 1247 stream will be yielded
1226 1248
1227 1249 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
1228 1250 `sidedata_helpers`.
1229 1251 """
1230 1252 clrevorder = {}
1231 1253 manifests = {}
1232 1254 mfl = self._repo.manifestlog
1233 1255 changedfiles = set()
1234 1256 clrevtomanifestrev = {}
1235 1257
1236 1258 state = {
1237 1259 b'clrevorder': clrevorder,
1238 1260 b'manifests': manifests,
1239 1261 b'changedfiles': changedfiles,
1240 1262 b'clrevtomanifestrev': clrevtomanifestrev,
1241 1263 }
1242 1264
1243 1265 if not (generate or self._ellipses):
1244 1266 # sort the nodes in storage order
1245 1267 nodes = sorted(nodes, key=cl.rev)
1246 1268 for node in nodes:
1247 1269 c = cl.changelogrevision(node)
1248 1270 clrevorder[node] = len(clrevorder)
1249 1271 # record the first changeset introducing this manifest version
1250 1272 manifests.setdefault(c.manifest, node)
1251 1273 # Record a complete list of potentially-changed files in
1252 1274 # this manifest.
1253 1275 changedfiles.update(c.files)
1254 1276
1255 1277 return state, ()
1256 1278
1257 1279 # Callback for the changelog, used to collect changed files and
1258 1280 # manifest nodes.
1259 1281 # Returns the linkrev node (identity in the changelog case).
1260 1282 def lookupcl(x):
1261 1283 c = cl.changelogrevision(x)
1262 1284 clrevorder[x] = len(clrevorder)
1263 1285
1264 1286 if self._ellipses:
1265 1287 # Only update manifests if x is going to be sent. Otherwise we
1266 1288 # end up with bogus linkrevs specified for manifests and
1267 1289 # we skip some manifest nodes that we should otherwise
1268 1290 # have sent.
1269 1291 if (
1270 1292 x in self._fullclnodes
1271 1293 or cl.rev(x) in self._precomputedellipsis
1272 1294 ):
1273 1295
1274 1296 manifestnode = c.manifest
1275 1297 # Record the first changeset introducing this manifest
1276 1298 # version.
1277 1299 manifests.setdefault(manifestnode, x)
1278 1300 # Set this narrow-specific dict so we have the lowest
1279 1301 # manifest revnum to look up for this cl revnum. (Part of
1280 1302 # mapping changelog ellipsis parents to manifest ellipsis
1281 1303 # parents)
1282 1304 clrevtomanifestrev.setdefault(
1283 1305 cl.rev(x), mfl.rev(manifestnode)
1284 1306 )
1285 1307 # We can't trust the changed files list in the changeset if the
1286 1308 # client requested a shallow clone.
1287 1309 if self._isshallow:
1288 1310 changedfiles.update(mfl[c.manifest].read().keys())
1289 1311 else:
1290 1312 changedfiles.update(c.files)
1291 1313 else:
1292 1314 # record the first changeset introducing this manifest version
1293 1315 manifests.setdefault(c.manifest, x)
1294 1316 # Record a complete list of potentially-changed files in
1295 1317 # this manifest.
1296 1318 changedfiles.update(c.files)
1297 1319
1298 1320 return x
1299 1321
1300 1322 gen = deltagroup(
1301 1323 self._repo,
1302 1324 cl,
1303 1325 nodes,
1304 1326 True,
1305 1327 lookupcl,
1306 1328 self._forcedeltaparentprev,
1307 1329 ellipses=self._ellipses,
1308 1330 topic=_(b'changesets'),
1309 1331 clrevtolocalrev={},
1310 1332 fullclnodes=self._fullclnodes,
1311 1333 precomputedellipsis=self._precomputedellipsis,
1312 1334 sidedata_helpers=sidedata_helpers,
1313 1335 )
1314 1336
1315 1337 return state, gen
1316 1338
1317 1339 def generatemanifests(
1318 1340 self,
1319 1341 commonrevs,
1320 1342 clrevorder,
1321 1343 fastpathlinkrev,
1322 1344 manifests,
1323 1345 fnodes,
1324 1346 source,
1325 1347 clrevtolocalrev,
1326 1348 sidedata_helpers=None,
1327 1349 ):
1328 1350 """Returns an iterator of changegroup chunks containing manifests.
1329 1351
1330 1352 `source` is unused here, but is used by extensions like remotefilelog to
1331 1353 change what is sent based in pulls vs pushes, etc.
1332 1354
1333 1355 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
1334 1356 `sidedata_helpers`.
1335 1357 """
1336 1358 repo = self._repo
1337 1359 mfl = repo.manifestlog
1338 1360 tmfnodes = {b'': manifests}
1339 1361
1340 1362 # Callback for the manifest, used to collect linkrevs for filelog
1341 1363 # revisions.
1342 1364 # Returns the linkrev node (collected in lookupcl).
1343 1365 def makelookupmflinknode(tree, nodes):
1344 1366 if fastpathlinkrev:
1345 1367 assert not tree
1346 1368
1347 1369 # pytype: disable=unsupported-operands
1348 1370 return manifests.__getitem__
1349 1371 # pytype: enable=unsupported-operands
1350 1372
1351 1373 def lookupmflinknode(x):
1352 1374 """Callback for looking up the linknode for manifests.
1353 1375
1354 1376 Returns the linkrev node for the specified manifest.
1355 1377
1356 1378 SIDE EFFECT:
1357 1379
1358 1380 1) fclnodes gets populated with the list of relevant
1359 1381 file nodes if we're not using fastpathlinkrev
1360 1382 2) When treemanifests are in use, collects treemanifest nodes
1361 1383 to send
1362 1384
1363 1385 Note that this means manifests must be completely sent to
1364 1386 the client before you can trust the list of files and
1365 1387 treemanifests to send.
1366 1388 """
1367 1389 clnode = nodes[x]
1368 1390 mdata = mfl.get(tree, x).readfast(shallow=True)
1369 1391 for p, n, fl in mdata.iterentries():
1370 1392 if fl == b't': # subdirectory manifest
1371 1393 subtree = tree + p + b'/'
1372 1394 tmfclnodes = tmfnodes.setdefault(subtree, {})
1373 1395 tmfclnode = tmfclnodes.setdefault(n, clnode)
1374 1396 if clrevorder[clnode] < clrevorder[tmfclnode]:
1375 1397 tmfclnodes[n] = clnode
1376 1398 else:
1377 1399 f = tree + p
1378 1400 fclnodes = fnodes.setdefault(f, {})
1379 1401 fclnode = fclnodes.setdefault(n, clnode)
1380 1402 if clrevorder[clnode] < clrevorder[fclnode]:
1381 1403 fclnodes[n] = clnode
1382 1404 return clnode
1383 1405
1384 1406 return lookupmflinknode
1385 1407
1386 1408 while tmfnodes:
1387 1409 tree, nodes = tmfnodes.popitem()
1388 1410
1389 1411 should_visit = self._matcher.visitdir(tree[:-1])
1390 1412 if tree and not should_visit:
1391 1413 continue
1392 1414
1393 1415 store = mfl.getstorage(tree)
1394 1416
1395 1417 if not should_visit:
1396 1418 # No nodes to send because this directory is out of
1397 1419 # the client's view of the repository (probably
1398 1420 # because of narrow clones). Do this even for the root
1399 1421 # directory (tree=='')
1400 1422 prunednodes = []
1401 1423 else:
1402 1424 # Avoid sending any manifest nodes we can prove the
1403 1425 # client already has by checking linkrevs. See the
1404 1426 # related comment in generatefiles().
1405 1427 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1406 1428
1407 1429 if tree and not prunednodes:
1408 1430 continue
1409 1431
1410 1432 lookupfn = makelookupmflinknode(tree, nodes)
1411 1433
1412 1434 deltas = deltagroup(
1413 1435 self._repo,
1414 1436 store,
1415 1437 prunednodes,
1416 1438 False,
1417 1439 lookupfn,
1418 1440 self._forcedeltaparentprev,
1419 1441 ellipses=self._ellipses,
1420 1442 topic=_(b'manifests'),
1421 1443 clrevtolocalrev=clrevtolocalrev,
1422 1444 fullclnodes=self._fullclnodes,
1423 1445 precomputedellipsis=self._precomputedellipsis,
1424 1446 sidedata_helpers=sidedata_helpers,
1425 1447 )
1426 1448
1427 1449 if not self._oldmatcher.visitdir(store.tree[:-1]):
1428 1450 yield tree, deltas
1429 1451 else:
1430 1452 # 'deltas' is a generator and we need to consume it even if
1431 1453 # we are not going to send it because a side-effect is that
1432 1454 # it updates tmdnodes (via lookupfn)
1433 1455 for d in deltas:
1434 1456 pass
1435 1457 if not tree:
1436 1458 yield tree, []
1437 1459
1438 1460 def _prunemanifests(self, store, nodes, commonrevs):
1439 1461 if not self._ellipses:
1440 1462 # In non-ellipses case and large repositories, it is better to
1441 1463 # prevent calling of store.rev and store.linkrev on a lot of
1442 1464 # nodes as compared to sending some extra data
1443 1465 return nodes.copy()
1444 1466 # This is split out as a separate method to allow filtering
1445 1467 # commonrevs in extension code.
1446 1468 #
1447 1469 # TODO(augie): this shouldn't be required, instead we should
1448 1470 # make filtering of revisions to send delegated to the store
1449 1471 # layer.
1450 1472 frev, flr = store.rev, store.linkrev
1451 1473 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1452 1474
1453 1475 # The 'source' parameter is useful for extensions
1454 1476 def generatefiles(
1455 1477 self,
1456 1478 changedfiles,
1457 1479 commonrevs,
1458 1480 source,
1459 1481 mfdicts,
1460 1482 fastpathlinkrev,
1461 1483 fnodes,
1462 1484 clrevs,
1463 1485 sidedata_helpers=None,
1464 1486 ):
1465 1487 changedfiles = [
1466 1488 f
1467 1489 for f in changedfiles
1468 1490 if self._matcher(f) and not self._oldmatcher(f)
1469 1491 ]
1470 1492
1471 1493 if not fastpathlinkrev:
1472 1494
1473 1495 def normallinknodes(unused, fname):
1474 1496 return fnodes.get(fname, {})
1475 1497
1476 1498 else:
1477 1499 cln = self._repo.changelog.node
1478 1500
1479 1501 def normallinknodes(store, fname):
1480 1502 flinkrev = store.linkrev
1481 1503 fnode = store.node
1482 1504 revs = ((r, flinkrev(r)) for r in store)
1483 1505 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1484 1506
1485 1507 clrevtolocalrev = {}
1486 1508
1487 1509 if self._isshallow:
1488 1510 # In a shallow clone, the linknodes callback needs to also include
1489 1511 # those file nodes that are in the manifests we sent but weren't
1490 1512 # introduced by those manifests.
1491 1513 commonctxs = [self._repo[c] for c in commonrevs]
1492 1514 clrev = self._repo.changelog.rev
1493 1515
1494 1516 def linknodes(flog, fname):
1495 1517 for c in commonctxs:
1496 1518 try:
1497 1519 fnode = c.filenode(fname)
1498 1520 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1499 1521 except error.ManifestLookupError:
1500 1522 pass
1501 1523 links = normallinknodes(flog, fname)
1502 1524 if len(links) != len(mfdicts):
1503 1525 for mf, lr in mfdicts:
1504 1526 fnode = mf.get(fname, None)
1505 1527 if fnode in links:
1506 1528 links[fnode] = min(links[fnode], lr, key=clrev)
1507 1529 elif fnode:
1508 1530 links[fnode] = lr
1509 1531 return links
1510 1532
1511 1533 else:
1512 1534 linknodes = normallinknodes
1513 1535
1514 1536 repo = self._repo
1515 1537 progress = repo.ui.makeprogress(
1516 1538 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1517 1539 )
1518 1540 for i, fname in enumerate(sorted(changedfiles)):
1519 1541 filerevlog = repo.file(fname)
1520 1542 if not filerevlog:
1521 1543 raise error.Abort(
1522 1544 _(b"empty or missing file data for %s") % fname
1523 1545 )
1524 1546
1525 1547 clrevtolocalrev.clear()
1526 1548
1527 1549 linkrevnodes = linknodes(filerevlog, fname)
1528 1550 # Lookup for filenodes, we collected the linkrev nodes above in the
1529 1551 # fastpath case and with lookupmf in the slowpath case.
1530 1552 def lookupfilelog(x):
1531 1553 return linkrevnodes[x]
1532 1554
1533 1555 frev, flr = filerevlog.rev, filerevlog.linkrev
1534 1556 # Skip sending any filenode we know the client already
1535 1557 # has. This avoids over-sending files relatively
1536 1558 # inexpensively, so it's not a problem if we under-filter
1537 1559 # here.
1538 1560 filenodes = [
1539 1561 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1540 1562 ]
1541 1563
1542 1564 if not filenodes:
1543 1565 continue
1544 1566
1545 1567 progress.update(i + 1, item=fname)
1546 1568
1547 1569 deltas = deltagroup(
1548 1570 self._repo,
1549 1571 filerevlog,
1550 1572 filenodes,
1551 1573 False,
1552 1574 lookupfilelog,
1553 1575 self._forcedeltaparentprev,
1554 1576 ellipses=self._ellipses,
1555 1577 clrevtolocalrev=clrevtolocalrev,
1556 1578 fullclnodes=self._fullclnodes,
1557 1579 precomputedellipsis=self._precomputedellipsis,
1558 1580 sidedata_helpers=sidedata_helpers,
1559 1581 )
1560 1582
1561 1583 yield fname, deltas
1562 1584
1563 1585 progress.complete()
1564 1586
1565 1587
1566 1588 def _makecg1packer(
1567 1589 repo,
1568 1590 oldmatcher,
1569 1591 matcher,
1570 1592 bundlecaps,
1571 1593 ellipses=False,
1572 1594 shallow=False,
1573 1595 ellipsisroots=None,
1574 1596 fullnodes=None,
1575 1597 remote_sidedata=None,
1576 1598 ):
1577 1599 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1578 1600 d.node, d.p1node, d.p2node, d.linknode
1579 1601 )
1580 1602
1581 1603 return cgpacker(
1582 1604 repo,
1583 1605 oldmatcher,
1584 1606 matcher,
1585 1607 b'01',
1586 1608 builddeltaheader=builddeltaheader,
1587 1609 manifestsend=b'',
1588 1610 forcedeltaparentprev=True,
1589 1611 bundlecaps=bundlecaps,
1590 1612 ellipses=ellipses,
1591 1613 shallow=shallow,
1592 1614 ellipsisroots=ellipsisroots,
1593 1615 fullnodes=fullnodes,
1594 1616 )
1595 1617
1596 1618
1597 1619 def _makecg2packer(
1598 1620 repo,
1599 1621 oldmatcher,
1600 1622 matcher,
1601 1623 bundlecaps,
1602 1624 ellipses=False,
1603 1625 shallow=False,
1604 1626 ellipsisroots=None,
1605 1627 fullnodes=None,
1606 1628 remote_sidedata=None,
1607 1629 ):
1608 1630 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1609 1631 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1610 1632 )
1611 1633
1612 1634 return cgpacker(
1613 1635 repo,
1614 1636 oldmatcher,
1615 1637 matcher,
1616 1638 b'02',
1617 1639 builddeltaheader=builddeltaheader,
1618 1640 manifestsend=b'',
1619 1641 bundlecaps=bundlecaps,
1620 1642 ellipses=ellipses,
1621 1643 shallow=shallow,
1622 1644 ellipsisroots=ellipsisroots,
1623 1645 fullnodes=fullnodes,
1624 1646 )
1625 1647
1626 1648
1627 1649 def _makecg3packer(
1628 1650 repo,
1629 1651 oldmatcher,
1630 1652 matcher,
1631 1653 bundlecaps,
1632 1654 ellipses=False,
1633 1655 shallow=False,
1634 1656 ellipsisroots=None,
1635 1657 fullnodes=None,
1636 1658 remote_sidedata=None,
1637 1659 ):
1638 1660 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1639 1661 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1640 1662 )
1641 1663
1642 1664 return cgpacker(
1643 1665 repo,
1644 1666 oldmatcher,
1645 1667 matcher,
1646 1668 b'03',
1647 1669 builddeltaheader=builddeltaheader,
1648 1670 manifestsend=closechunk(),
1649 1671 bundlecaps=bundlecaps,
1650 1672 ellipses=ellipses,
1651 1673 shallow=shallow,
1652 1674 ellipsisroots=ellipsisroots,
1653 1675 fullnodes=fullnodes,
1654 1676 )
1655 1677
1656 1678
1657 1679 def _makecg4packer(
1658 1680 repo,
1659 1681 oldmatcher,
1660 1682 matcher,
1661 1683 bundlecaps,
1662 1684 ellipses=False,
1663 1685 shallow=False,
1664 1686 ellipsisroots=None,
1665 1687 fullnodes=None,
1666 1688 remote_sidedata=None,
1667 1689 ):
1668 1690 # Sidedata is in a separate chunk from the delta to differentiate
1669 1691 # "raw delta" and sidedata.
1670 1692 def builddeltaheader(d):
1671 1693 return _CHANGEGROUPV4_DELTA_HEADER.pack(
1672 1694 d.protocol_flags,
1673 1695 d.node,
1674 1696 d.p1node,
1675 1697 d.p2node,
1676 1698 d.basenode,
1677 1699 d.linknode,
1678 1700 d.flags,
1679 1701 )
1680 1702
1681 1703 return cgpacker(
1682 1704 repo,
1683 1705 oldmatcher,
1684 1706 matcher,
1685 1707 b'04',
1686 1708 builddeltaheader=builddeltaheader,
1687 1709 manifestsend=closechunk(),
1688 1710 bundlecaps=bundlecaps,
1689 1711 ellipses=ellipses,
1690 1712 shallow=shallow,
1691 1713 ellipsisroots=ellipsisroots,
1692 1714 fullnodes=fullnodes,
1693 1715 remote_sidedata=remote_sidedata,
1694 1716 )
1695 1717
1696 1718
1697 1719 _packermap = {
1698 1720 b'01': (_makecg1packer, cg1unpacker),
1699 1721 # cg2 adds support for exchanging generaldelta
1700 1722 b'02': (_makecg2packer, cg2unpacker),
1701 1723 # cg3 adds support for exchanging revlog flags and treemanifests
1702 1724 b'03': (_makecg3packer, cg3unpacker),
1703 1725 # ch4 adds support for exchanging sidedata
1704 1726 b'04': (_makecg4packer, cg4unpacker),
1705 1727 }
1706 1728
1707 1729
1708 1730 def allsupportedversions(repo):
1709 1731 versions = set(_packermap.keys())
1710 1732 needv03 = False
1711 1733 if (
1712 1734 repo.ui.configbool(b'experimental', b'changegroup3')
1713 1735 or repo.ui.configbool(b'experimental', b'treemanifest')
1714 1736 or scmutil.istreemanifest(repo)
1715 1737 ):
1716 1738 # we keep version 03 because we need to to exchange treemanifest data
1717 1739 #
1718 1740 # we also keep vresion 01 and 02, because it is possible for repo to
1719 1741 # contains both normal and tree manifest at the same time. so using
1720 1742 # older version to pull data is viable
1721 1743 #
1722 1744 # (or even to push subset of history)
1723 1745 needv03 = True
1724 1746 if not needv03:
1725 1747 versions.discard(b'03')
1726 1748 want_v4 = (
1727 1749 repo.ui.configbool(b'experimental', b'changegroup4')
1728 1750 or requirements.REVLOGV2_REQUIREMENT in repo.requirements
1729 1751 or requirements.CHANGELOGV2_REQUIREMENT in repo.requirements
1730 1752 )
1731 1753 if not want_v4:
1732 1754 versions.discard(b'04')
1733 1755 return versions
1734 1756
1735 1757
1736 1758 # Changegroup versions that can be applied to the repo
1737 1759 def supportedincomingversions(repo):
1738 1760 return allsupportedversions(repo)
1739 1761
1740 1762
1741 1763 # Changegroup versions that can be created from the repo
1742 1764 def supportedoutgoingversions(repo):
1743 1765 versions = allsupportedversions(repo)
1744 1766 if scmutil.istreemanifest(repo):
1745 1767 # Versions 01 and 02 support only flat manifests and it's just too
1746 1768 # expensive to convert between the flat manifest and tree manifest on
1747 1769 # the fly. Since tree manifests are hashed differently, all of history
1748 1770 # would have to be converted. Instead, we simply don't even pretend to
1749 1771 # support versions 01 and 02.
1750 1772 versions.discard(b'01')
1751 1773 versions.discard(b'02')
1752 1774 if requirements.NARROW_REQUIREMENT in repo.requirements:
1753 1775 # Versions 01 and 02 don't support revlog flags, and we need to
1754 1776 # support that for stripping and unbundling to work.
1755 1777 versions.discard(b'01')
1756 1778 versions.discard(b'02')
1757 1779 if LFS_REQUIREMENT in repo.requirements:
1758 1780 # Versions 01 and 02 don't support revlog flags, and we need to
1759 1781 # mark LFS entries with REVIDX_EXTSTORED.
1760 1782 versions.discard(b'01')
1761 1783 versions.discard(b'02')
1762 1784
1763 1785 return versions
1764 1786
1765 1787
1766 1788 def localversion(repo):
1767 1789 # Finds the best version to use for bundles that are meant to be used
1768 1790 # locally, such as those from strip and shelve, and temporary bundles.
1769 1791 return max(supportedoutgoingversions(repo))
1770 1792
1771 1793
1772 1794 def safeversion(repo):
1773 1795 # Finds the smallest version that it's safe to assume clients of the repo
1774 1796 # will support. For example, all hg versions that support generaldelta also
1775 1797 # support changegroup 02.
1776 1798 versions = supportedoutgoingversions(repo)
1777 1799 if requirements.GENERALDELTA_REQUIREMENT in repo.requirements:
1778 1800 versions.discard(b'01')
1779 1801 assert versions
1780 1802 return min(versions)
1781 1803
1782 1804
1783 1805 def getbundler(
1784 1806 version,
1785 1807 repo,
1786 1808 bundlecaps=None,
1787 1809 oldmatcher=None,
1788 1810 matcher=None,
1789 1811 ellipses=False,
1790 1812 shallow=False,
1791 1813 ellipsisroots=None,
1792 1814 fullnodes=None,
1793 1815 remote_sidedata=None,
1794 1816 ):
1795 1817 assert version in supportedoutgoingversions(repo)
1796 1818
1797 1819 if matcher is None:
1798 1820 matcher = matchmod.always()
1799 1821 if oldmatcher is None:
1800 1822 oldmatcher = matchmod.never()
1801 1823
1802 1824 if version == b'01' and not matcher.always():
1803 1825 raise error.ProgrammingError(
1804 1826 b'version 01 changegroups do not support sparse file matchers'
1805 1827 )
1806 1828
1807 1829 if ellipses and version in (b'01', b'02'):
1808 1830 raise error.Abort(
1809 1831 _(
1810 1832 b'ellipsis nodes require at least cg3 on client and server, '
1811 1833 b'but negotiated version %s'
1812 1834 )
1813 1835 % version
1814 1836 )
1815 1837
1816 1838 # Requested files could include files not in the local store. So
1817 1839 # filter those out.
1818 1840 matcher = repo.narrowmatch(matcher)
1819 1841
1820 1842 fn = _packermap[version][0]
1821 1843 return fn(
1822 1844 repo,
1823 1845 oldmatcher,
1824 1846 matcher,
1825 1847 bundlecaps,
1826 1848 ellipses=ellipses,
1827 1849 shallow=shallow,
1828 1850 ellipsisroots=ellipsisroots,
1829 1851 fullnodes=fullnodes,
1830 1852 remote_sidedata=remote_sidedata,
1831 1853 )
1832 1854
1833 1855
1834 1856 def getunbundler(version, fh, alg, extras=None):
1835 1857 return _packermap[version][1](fh, alg, extras=extras)
1836 1858
1837 1859
1838 1860 def _changegroupinfo(repo, nodes, source):
1839 1861 if repo.ui.verbose or source == b'bundle':
1840 1862 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1841 1863 if repo.ui.debugflag:
1842 1864 repo.ui.debug(b"list of changesets:\n")
1843 1865 for node in nodes:
1844 1866 repo.ui.debug(b"%s\n" % hex(node))
1845 1867
1846 1868
1847 1869 def makechangegroup(
1848 1870 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1849 1871 ):
1850 1872 cgstream = makestream(
1851 1873 repo,
1852 1874 outgoing,
1853 1875 version,
1854 1876 source,
1855 1877 fastpath=fastpath,
1856 1878 bundlecaps=bundlecaps,
1857 1879 )
1858 1880 return getunbundler(
1859 1881 version,
1860 1882 util.chunkbuffer(cgstream),
1861 1883 None,
1862 1884 {b'clcount': len(outgoing.missing)},
1863 1885 )
1864 1886
1865 1887
1866 1888 def makestream(
1867 1889 repo,
1868 1890 outgoing,
1869 1891 version,
1870 1892 source,
1871 1893 fastpath=False,
1872 1894 bundlecaps=None,
1873 1895 matcher=None,
1874 1896 remote_sidedata=None,
1875 1897 ):
1876 1898 bundler = getbundler(
1877 1899 version,
1878 1900 repo,
1879 1901 bundlecaps=bundlecaps,
1880 1902 matcher=matcher,
1881 1903 remote_sidedata=remote_sidedata,
1882 1904 )
1883 1905
1884 1906 repo = repo.unfiltered()
1885 1907 commonrevs = outgoing.common
1886 1908 csets = outgoing.missing
1887 1909 heads = outgoing.ancestorsof
1888 1910 # We go through the fast path if we get told to, or if all (unfiltered
1889 1911 # heads have been requested (since we then know there all linkrevs will
1890 1912 # be pulled by the client).
1891 1913 heads.sort()
1892 1914 fastpathlinkrev = fastpath or (
1893 1915 repo.filtername is None and heads == sorted(repo.heads())
1894 1916 )
1895 1917
1896 1918 repo.hook(b'preoutgoing', throw=True, source=source)
1897 1919 _changegroupinfo(repo, csets, source)
1898 1920 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1899 1921
1900 1922
1901 1923 def _addchangegroupfiles(
1902 1924 repo,
1903 1925 source,
1904 1926 revmap,
1905 1927 trp,
1906 1928 expectedfiles,
1907 1929 needfiles,
1908 1930 addrevisioncb=None,
1909 1931 ):
1910 1932 revisions = 0
1911 1933 files = 0
1912 1934 progress = repo.ui.makeprogress(
1913 1935 _(b'files'), unit=_(b'files'), total=expectedfiles
1914 1936 )
1915 1937 for chunkdata in iter(source.filelogheader, {}):
1916 1938 files += 1
1917 1939 f = chunkdata[b"filename"]
1918 1940 repo.ui.debug(b"adding %s revisions\n" % f)
1919 1941 progress.increment()
1920 1942 fl = repo.file(f)
1921 1943 o = len(fl)
1922 1944 try:
1923 1945 deltas = source.deltaiter()
1924 1946 added = fl.addgroup(
1925 1947 deltas,
1926 1948 revmap,
1927 1949 trp,
1928 1950 addrevisioncb=addrevisioncb,
1929 1951 )
1930 1952 if not added:
1931 1953 raise error.Abort(_(b"received file revlog group is empty"))
1932 1954 except error.CensoredBaseError as e:
1933 1955 raise error.Abort(_(b"received delta base is censored: %s") % e)
1934 1956 revisions += len(fl) - o
1935 1957 if f in needfiles:
1936 1958 needs = needfiles[f]
1937 1959 for new in pycompat.xrange(o, len(fl)):
1938 1960 n = fl.node(new)
1939 1961 if n in needs:
1940 1962 needs.remove(n)
1941 1963 else:
1942 1964 raise error.Abort(_(b"received spurious file revlog entry"))
1943 1965 if not needs:
1944 1966 del needfiles[f]
1945 1967 progress.complete()
1946 1968
1947 1969 for f, needs in pycompat.iteritems(needfiles):
1948 1970 fl = repo.file(f)
1949 1971 for n in needs:
1950 1972 try:
1951 1973 fl.rev(n)
1952 1974 except error.LookupError:
1953 1975 raise error.Abort(
1954 1976 _(b'missing file data for %s:%s - run hg verify')
1955 1977 % (f, hex(n))
1956 1978 )
1957 1979
1958 1980 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now