# HG changeset patch # User Durham Goode # Date 2017-09-07 01:33:55 # Node ID bbdca7e460c0d9e332c733b48715d81f7a565e3f # Parent a763c891f36e55f4869f443c220227d1da747d18 changegroup: fix to allow empty manifest parts The current chunk reading algorithm relied on counting the number of empty chunks and comparing it to the number of chunk lists it expected (1 list of files for cg1 and cg2, and 1 list of files + 1 list of trees for cg3). This implicitly assumed that both the changelog part and the manifestlog part were never empty (since them being empty would cause it to count it as one list being done, and screw up the count). In our treemanifest code, the manifest section could be empty, so we need to handle that case. This patches refactors that code to be more explicit about how it counts the expected parts. Differential Revision: https://phab.mercurial-scm.org/D646 diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py --- a/mercurial/changegroup.py +++ b/mercurial/changegroup.py @@ -199,23 +199,36 @@ class cg1unpacker(object): network API. To do so, it parse the changegroup data, otherwise it will block in case of sshrepo because it don't know the end of the stream. """ - # an empty chunkgroup is the end of the changegroup - # a changegroup has at least 2 chunkgroups (changelog and manifest). - # after that, changegroup versions 1 and 2 have a series of groups - # with one group per file. changegroup 3 has a series of directory - # manifests before the files. - count = 0 - emptycount = 0 - while emptycount < self._grouplistcount: - empty = True - count += 1 + # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog, + # and a list of filelogs. For changegroup 3, we expect 4 parts: + # changelog, manifestlog, a list of tree manifestlogs, and a list of + # filelogs. + # + # Changelog and manifestlog parts are terminated with empty chunks. The + # tree and file parts are a list of entry sections. Each entry section + # is a series of chunks terminating in an empty chunk. The list of these + # entry sections is terminated in yet another empty chunk, so we know + # we've reached the end of the tree/file list when we reach an empty + # chunk that was proceeded by no non-empty chunks. + + parts = 0 + while parts < 2 + self._grouplistcount: + noentries = True while True: chunk = getchunk(self) if not chunk: - if empty and count > 2: - emptycount += 1 + # The first two empty chunks represent the end of the + # changelog and the manifestlog portions. The remaining + # empty chunks represent either A) the end of individual + # tree or file entries in the file list, or B) the end of + # the entire list. It's the end of the entire list if there + # were no entries (i.e. noentries is True). + if parts < 2: + parts += 1 + elif noentries: + parts += 1 break - empty = False + noentries = False yield chunkheader(len(chunk)) pos = 0 while pos < len(chunk):