upstream/mercurial-mirror Files · mercurial/dirstateutils/v2.py

exchange: improve computation of relevant markers for large repos...

exchange: improve computation of relevant markers for large repos Compute the candidate nodes with relevant markers directly from keys of the predecessors/successors/children dictionaries of obsstore. This is faster than iterating over all nodes directly. This test could be further improved for repositories with relative few markers compared to the repository size, but this is no longer hot already. With the current loop structure, the obshashrange use works as well as before as it passes lists with a single node. Adjust the interface by allowing revision lists as well as node lists. This helps cases that computes ancestors as it reduces the materialisation cost. Use this in _pushdiscoveryobsmarker and _getbundleobsmarkerpart. Improve the latter further by directly using ancestors(). Performance benchmarks show notable and welcome improvement to no-op push and pull (that would also apply to other push/pull). This apply to push and pull done without evolve. ### push/pull Benchmark parameter # bin-env-vars.hg.flavor = default # benchmark.variants.explicit-rev = none # benchmark.variants.protocol = ssh # benchmark.variants.revs = none ## benchmark.name = hg.command.pull # data-env-vars.name = mercurial-devel-2024-03-22-zstd-sparse-revlog before: 5.968537 seconds after: 5.668507 seconds (-5.03%, -0.30) # data-env-vars.name = tryton-devel-2024-03-22-zstd-sparse-revlog before: 1.446232 seconds after: 0.835553 seconds (-42.23%, -0.61) # data-env-vars.name = netbsd-src-draft-2024-09-19-zstd-sparse-revlog before: 5.777412 seconds after: 2.523454 seconds (-56.32%, -3.25) ## benchmark.name = hg.command.push # data-env-vars.name = mercurial-devel-2024-03-22-zstd-sparse-revlog before: 6.155501 seconds after: 5.885072 seconds (-4.39%, -0.27) # data-env-vars.name = tryton-devel-2024-03-22-zstd-sparse-revlog before: 1.491054 seconds after: 0.934882 seconds (-37.30%, -0.56) # data-env-vars.name = netbsd-src-draft-2024-09-19-zstd-sparse-revlog before: 5.902494 seconds after: 2.957644 seconds (-49.89%, -2.94) There is not notable different in these result using the "rust" flavor instead of the "default". The performance impact on the same operation when using evolve were also tested and no impact was noted.

Matt Harbison - - Load All Authors

File last commit:

r52756:f4733654 default


                r52789:8583d138

default

Download file

             v2.py
        
                    429 lines
            
             | 15.0 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / dirstateutils / v2.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # v2.py - Pure-Python implementation of the dirstate-v2 file format

      #

      # Copyright Mercurial Contributors

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      from __future__ import annotations

      import struct

      import typing

      from ..thirdparty import attr

      # Force pytype to use the non-vendored package

      if typing.TYPE_CHECKING:

          # noinspection PyPackageRequirements

          import attr

      from .. import error, policy

      parsers = policy.importmod('parsers')

      # Must match the constant of the same name in

      # `rust/hg-core/src/dirstate_tree/on_disk.rs`

      TREE_METADATA_SIZE = 44

      NODE_SIZE = 44

      # Must match the `TreeMetadata` Rust struct in

      # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.

      #

      # * 4 bytes: start offset of root nodes

      # * 4 bytes: number of root nodes

      # * 4 bytes: total number of nodes in the tree that have an entry

      # * 4 bytes: total number of nodes in the tree that have a copy source

      # * 4 bytes: number of bytes in the data file that are not used anymore

      # * 4 bytes: unused

      # * 20 bytes: SHA-1 hash of ignore patterns

      TREE_METADATA = struct.Struct('>LLLLL4s20s')

      # Must match the `Node` Rust struct in

      # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.

      #

      # * 4 bytes: start offset of full path

      # * 2 bytes: length of the full path

      # * 2 bytes: length within the full path before its "base name"

      # * 4 bytes: start offset of the copy source if any, or zero for no copy source

      # * 2 bytes: length of the copy source if any, or unused

      # * 4 bytes: start offset of child nodes

      # * 4 bytes: number of child nodes

      # * 4 bytes: number of descendant nodes that have an entry

      # * 4 bytes: number of descendant nodes that have a "tracked" state

      # * 1 byte: flags

      # * 4 bytes: expected size

      # * 4 bytes: mtime seconds

      # * 4 bytes: mtime nanoseconds

      NODE = struct.Struct('>LHHLHLLLLHlll')

      assert TREE_METADATA_SIZE == TREE_METADATA.size

      assert NODE_SIZE == NODE.size

      # match constant in mercurial/pure/parsers.py

      DIRSTATE_V2_DIRECTORY = 1 << 13

      def parse_dirstate(map, copy_map, data, tree_metadata):

          """parse a full v2-dirstate from a binary data into dictionaries:

          - map: a {path: entry} mapping that will be filled

          - copy_map: a {path: copy-source} mapping that will be filled

          - data: a binary blob contains v2 nodes data

          - tree_metadata:: a binary blob of the top level node (from the docket)

          """

          (

              root_nodes_start,

              root_nodes_len,

              _nodes_with_entry_count,

              _nodes_with_copy_source_count,

              _unreachable_bytes,

              _unused,

              _ignore_patterns_hash,

          ) = TREE_METADATA.unpack(tree_metadata)

          parse_nodes(map, copy_map, data, root_nodes_start, root_nodes_len)

      def parse_nodes(map, copy_map, data, start, len):

          """parse <len> nodes from <data> starting at offset <start>

          This is used by parse_dirstate to recursively fill `map` and `copy_map`.

          All directory specific information is ignored and do not need any

          processing (DIRECTORY, ALL_UNKNOWN_RECORDED, ALL_IGNORED_RECORDED)

          """

          for i in range(len):

              node_start = start + NODE_SIZE * i

              node_bytes = slice_with_len(data, node_start, NODE_SIZE)

              (

                  path_start,

                  path_len,

                  _basename_start,

                  copy_source_start,

                  copy_source_len,

                  children_start,

                  children_count,

                  _descendants_with_entry_count,

                  _tracked_descendants_count,

                  flags,

                  size,

                  mtime_s,

                  mtime_ns,

              ) = NODE.unpack(node_bytes)

              # Parse child nodes of this node recursively

              parse_nodes(map, copy_map, data, children_start, children_count)

              item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s, mtime_ns)

              if not item.any_tracked:

                  continue

              path = slice_with_len(data, path_start, path_len)

              map[path] = item

              if copy_source_start:

                  copy_map[path] = slice_with_len(

                      data, copy_source_start, copy_source_len

                  )

      def slice_with_len(data, start, len):

          return data[start : start + len]

      @attr.s

      class Node:

          path = attr.ib()

          entry = attr.ib()

          parent = attr.ib(default=None)

          children_count = attr.ib(default=0)

          children_offset = attr.ib(default=0)

          descendants_with_entry = attr.ib(default=0)

          tracked_descendants = attr.ib(default=0)

          def pack(self, copy_map, paths_offset):

              path = self.path

              copy = copy_map.get(path)

              entry = self.entry

              path_start = paths_offset

              path_len = len(path)

              basename_start = path.rfind(b'/') + 1  # 0 if rfind returns -1

              if copy is not None:

                  copy_source_start = paths_offset + len(path)

                  copy_source_len = len(copy)

              else:

                  copy_source_start = 0

                  copy_source_len = 0

              if entry is not None:

                  flags, size, mtime_s, mtime_ns = entry.v2_data()

              else:

                  # There are no mtime-cached directories in the Python implementation

                  flags = DIRSTATE_V2_DIRECTORY

                  size = 0

                  mtime_s = 0

                  mtime_ns = 0

              return NODE.pack(

                  path_start,

                  path_len,

                  basename_start,

                  copy_source_start,

                  copy_source_len,

                  self.children_offset,

                  self.children_count,

                  self.descendants_with_entry,

                  self.tracked_descendants,

                  flags,

                  size,

                  mtime_s,

                  mtime_ns,

              )

      def pack_dirstate(map, copy_map):

          """

          Pack `map` and `copy_map` into the dirstate v2 binary format and return

          the tuple of (data, metadata) bytearrays.

          The on-disk format expects a tree-like structure where the leaves are

          written first (and sorted per-directory), going up levels until the root

          node and writing that one to the docket. See more details on the on-disk

          format in `mercurial/helptext/internals/dirstate-v2`.

          Since both `map` and `copy_map` are flat dicts we need to figure out the

          hierarchy. This algorithm does so without having to build the entire tree

          in-memory: it only keeps the minimum number of nodes around to satisfy the

          format.

          # Algorithm explanation

          This explanation does not talk about the different counters for tracked

          descendants and storing the copies, but that work is pretty simple once this

          algorithm is in place.

          ## Building a subtree

          First, sort `map`: this makes it so the leaves of the tree are contiguous

          per directory (i.e. a/b/c and a/b/d will be next to each other in the list),

          and enables us to use the ordering of folders to have a "cursor" of the

          current folder we're in without ever going twice in the same branch of the

          tree. The cursor is a node that remembers its parent and any information

          relevant to the format (see the `Node` class), building the relevant part

          of the tree lazily.

          Then, for each file in `map`, move the cursor into the tree to the

          corresponding folder of the file: for example, if the very first file

          is "a/b/c", we start from `Node[""]`, create `Node["a"]` which points to

          its parent `Node[""]`, then create `Node["a/b"]`, which points to its parent

          `Node["a"]`. These nodes are kept around in a stack.

          If the next file in `map` is in the same subtree ("a/b/d" or "a/b/e/f"), we

          add it to the stack and keep looping with the same logic of creating the

          tree nodes as needed. If however the next file in `map` is *not* in the same

          subtree ("a/other", if we're still in the "a/b" folder), then we know that

          the subtree we're in is complete.

          ## Writing the subtree

          We have the entire subtree in the stack, so we start writing it to disk

          folder by folder. The way we write a folder is to pop the stack into a list

          until the folder changes, revert this list of direct children (to satisfy

          the format requirement that children be sorted). This process repeats until

          we hit the "other" subtree.

          An example:

              a

              dir1/b

              dir1/c

              dir2/dir3/d

              dir2/dir3/e

              dir2/f

          Would have us:

              - add to the stack until "dir2/dir3/e"

              - realize that "dir2/f" is in a different subtree

              - pop "dir2/dir3/e", "dir2/dir3/d", reverse them so they're sorted and

                pack them since the next entry is "dir2/dir3"

              - go back up to "dir2"

              - add "dir2/f" to the stack

              - realize we're done with the map

              - pop "dir2/f", "dir2/dir3" from the stack, reverse and pack them

              - go up to the root node, do the same to write "a", "dir1" and "dir2" in

                that order

          ## Special case for the root node

          The root node is not serialized in the format, but its information is

          written to the docket. Again, see more details on the on-disk format in

          `mercurial/helptext/internals/dirstate-v2`.

          """

          data = bytearray()

          root_nodes_start = 0

          root_nodes_len = 0

          nodes_with_entry_count = 0

          nodes_with_copy_source_count = 0

          # Will always be 0 since this implementation always re-writes everything

          # to disk

          unreachable_bytes = 0

          unused = b'\x00' * 4

          # This is an optimization that's only useful for the Rust implementation

          ignore_patterns_hash = b'\x00' * 20

          if len(map) == 0:

              tree_metadata = TREE_METADATA.pack(

                  root_nodes_start,

                  root_nodes_len,

                  nodes_with_entry_count,

                  nodes_with_copy_source_count,

                  unreachable_bytes,

                  unused,

                  ignore_patterns_hash,

              )

              return data, tree_metadata

          sorted_map = sorted(map.items(), key=lambda x: x[0].split(b"/"))

          # Use a stack to have to only remember the nodes we currently need

          # instead of building the entire tree in memory

          stack = []

          current_node = Node(b"", None)

          stack.append(current_node)

          for index, (path, entry) in enumerate(sorted_map, 1):

              nodes_with_entry_count += 1

              if path in copy_map:

                  nodes_with_copy_source_count += 1

              current_folder = get_folder(path)

              current_node = move_to_correct_node_in_tree(

                  current_folder, current_node, stack

              )

              current_node.children_count += 1

              # Entries from `map` are never `None`

              if entry.tracked:

                  current_node.tracked_descendants += 1

              current_node.descendants_with_entry += 1

              stack.append(Node(path, entry, current_node))

              should_pack = True

              next_path = None

              if index < len(sorted_map):

                  # Determine if the next entry is in the same sub-tree, if so don't

                  # pack yet

                  next_path = sorted_map[index][0]

                  should_pack = not is_ancestor(next_path, current_folder)

              if should_pack:

                  pack_directory_children(current_node, copy_map, data, stack)

                  while stack and current_node.path != b"":

                      # Go up the tree and write until we reach the folder of the next

                      # entry (if any, otherwise the root)

                      parent = current_node.parent

                      in_ancestor_of_next_path = next_path is not None and (

                          is_ancestor(next_path, get_folder(stack[-1].path))

                      )

                      if parent is None or in_ancestor_of_next_path:

                          break

                      pack_directory_children(parent, copy_map, data, stack)

                      current_node = parent

          # Special case for the root node since we don't write it to disk, only its

          # children to the docket

          current_node = stack.pop()

          assert current_node.path == b"", current_node.path

          assert len(stack) == 0, len(stack)

          tree_metadata = TREE_METADATA.pack(

              current_node.children_offset,

              current_node.children_count,

              nodes_with_entry_count,

              nodes_with_copy_source_count,

              unreachable_bytes,

              unused,

              ignore_patterns_hash,

          )

          return data, tree_metadata

      def get_folder(path):

          """

          Return the folder of the path that's given, an empty string for root paths.

          """

          return path.rsplit(b'/', 1)[0] if b'/' in path else b''

      def is_ancestor(path, maybe_ancestor):

          """Returns whether `maybe_ancestor` is an ancestor of `path`.

          >>> is_ancestor(b"a", b"")

          True

          >>> is_ancestor(b"a/b/c", b"a/b/c")

          False

          >>> is_ancestor(b"hgext3rd/__init__.py", b"hgext")

          False

          >>> is_ancestor(b"hgext3rd/__init__.py", b"hgext3rd")

          True

          """

          if maybe_ancestor == b"":

              return True

          if path <= maybe_ancestor:

              return False

          path_components = path.split(b"/")

          ancestor_components = maybe_ancestor.split(b"/")

          return all(c == o for c, o in zip(path_components, ancestor_components))

      def move_to_correct_node_in_tree(target_folder, current_node, stack):

          """

          Move inside the dirstate node tree to the node corresponding to

          `target_folder`, creating the missing nodes along the way if needed.

          """

          while target_folder != current_node.path:

              if is_ancestor(target_folder, current_node.path):

                  # We need to go down a folder

                  prefix = target_folder[len(current_node.path) :].lstrip(b'/')

                  subfolder_name = prefix.split(b'/', 1)[0]

                  if current_node.path:

                      subfolder_path = current_node.path + b'/' + subfolder_name

                  else:

                      subfolder_path = subfolder_name

                  next_node = stack[-1]

                  if next_node.path == target_folder:

                      # This folder is now a file and only contains removed entries

                      # merge with the last node

                      current_node = next_node

                  else:

                      current_node.children_count += 1

                      current_node = Node(subfolder_path, None, current_node)

                      stack.append(current_node)

              else:

                  # We need to go up a folder

                  current_node = current_node.parent

          return current_node

      def pack_directory_children(node, copy_map, data, stack):

          """

          Write the binary representation of the direct sorted children of `node` to

          `data`

          """

          direct_children = []

          while stack[-1].path != b"" and get_folder(stack[-1].path) == node.path:

              direct_children.append(stack.pop())

          if not direct_children:

              raise error.ProgrammingError(b"no direct children for %r" % node.path)

          # Reverse the stack to get the correct sorted order

          direct_children.reverse()

          packed_children = bytearray()

          # Write the paths to `data`. Pack child nodes but don't write them yet

          for child in direct_children:

              packed = child.pack(copy_map=copy_map, paths_offset=len(data))

              packed_children.extend(packed)

              data.extend(child.path)

              data.extend(copy_map.get(child.path, b""))

              node.tracked_descendants += child.tracked_descendants

              node.descendants_with_entry += child.descendants_with_entry

          # Write the fixed-size child nodes all together

          node.children_offset = len(data)

          data.extend(packed_children)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# v2.py - Pure-Python implementation of the dirstate-v2 file format
				#
				# Copyright Mercurial Contributors
				#
				# This software may be used and distributed according to the terms of the
				# GNU General Public License version 2 or any later version.

				from __future__ import annotations

				import struct
				import typing

				from ..thirdparty import attr

				# Force pytype to use the non-vendored package
				if typing.TYPE_CHECKING:
				# noinspection PyPackageRequirements
				import attr

				from .. import error, policy

				parsers = policy.importmod('parsers')


				# Must match the constant of the same name in
				# `rust/hg-core/src/dirstate_tree/on_disk.rs`
				TREE_METADATA_SIZE = 44
				NODE_SIZE = 44


				# Must match the `TreeMetadata` Rust struct in
				# `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
				#
				# * 4 bytes: start offset of root nodes
				# * 4 bytes: number of root nodes
				# * 4 bytes: total number of nodes in the tree that have an entry
				# * 4 bytes: total number of nodes in the tree that have a copy source
				# * 4 bytes: number of bytes in the data file that are not used anymore
				# * 4 bytes: unused
				# * 20 bytes: SHA-1 hash of ignore patterns
				TREE_METADATA = struct.Struct('>LLLLL4s20s')


				# Must match the `Node` Rust struct in
				# `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
				#
				# * 4 bytes: start offset of full path
				# * 2 bytes: length of the full path
				# * 2 bytes: length within the full path before its "base name"
				# * 4 bytes: start offset of the copy source if any, or zero for no copy source
				# * 2 bytes: length of the copy source if any, or unused
				# * 4 bytes: start offset of child nodes
				# * 4 bytes: number of child nodes
				# * 4 bytes: number of descendant nodes that have an entry
				# * 4 bytes: number of descendant nodes that have a "tracked" state
				# * 1 byte: flags
				# * 4 bytes: expected size
				# * 4 bytes: mtime seconds
				# * 4 bytes: mtime nanoseconds
				NODE = struct.Struct('>LHHLHLLLLHlll')


				assert TREE_METADATA_SIZE == TREE_METADATA.size
				assert NODE_SIZE == NODE.size

				# match constant in mercurial/pure/parsers.py
				DIRSTATE_V2_DIRECTORY = 1 << 13


				def parse_dirstate(map, copy_map, data, tree_metadata):
				"""parse a full v2-dirstate from a binary data into dictionaries:

				- map: a {path: entry} mapping that will be filled
				- copy_map: a {path: copy-source} mapping that will be filled
				- data: a binary blob contains v2 nodes data
				- tree_metadata:: a binary blob of the top level node (from the docket)
				"""
				(
				root_nodes_start,
				root_nodes_len,
				_nodes_with_entry_count,
				_nodes_with_copy_source_count,
				_unreachable_bytes,
				_unused,
				_ignore_patterns_hash,
				) = TREE_METADATA.unpack(tree_metadata)
				parse_nodes(map, copy_map, data, root_nodes_start, root_nodes_len)


				def parse_nodes(map, copy_map, data, start, len):
				"""parse <len> nodes from <data> starting at offset <start>

				This is used by parse_dirstate to recursively fill `map` and `copy_map`.

				All directory specific information is ignored and do not need any
				processing (DIRECTORY, ALL_UNKNOWN_RECORDED, ALL_IGNORED_RECORDED)
				"""
				for i in range(len):
				node_start = start + NODE_SIZE * i
				node_bytes = slice_with_len(data, node_start, NODE_SIZE)
				(
				path_start,
				path_len,
				_basename_start,
				copy_source_start,
				copy_source_len,
				children_start,
				children_count,
				_descendants_with_entry_count,
				_tracked_descendants_count,
				flags,
				size,
				mtime_s,
				mtime_ns,
				) = NODE.unpack(node_bytes)

				# Parse child nodes of this node recursively
				parse_nodes(map, copy_map, data, children_start, children_count)

				item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s, mtime_ns)
				if not item.any_tracked:
				continue
				path = slice_with_len(data, path_start, path_len)
				map[path] = item
				if copy_source_start:
				copy_map[path] = slice_with_len(
				data, copy_source_start, copy_source_len
				)


				def slice_with_len(data, start, len):
				return data[start : start + len]


				@attr.s
				class Node:
				path = attr.ib()
				entry = attr.ib()
				parent = attr.ib(default=None)
				children_count = attr.ib(default=0)
				children_offset = attr.ib(default=0)
				descendants_with_entry = attr.ib(default=0)
				tracked_descendants = attr.ib(default=0)

				def pack(self, copy_map, paths_offset):
				path = self.path
				copy = copy_map.get(path)
				entry = self.entry

				path_start = paths_offset
				path_len = len(path)
				basename_start = path.rfind(b'/') + 1 # 0 if rfind returns -1
				if copy is not None:
				copy_source_start = paths_offset + len(path)
				copy_source_len = len(copy)
				else:
				copy_source_start = 0
				copy_source_len = 0
				if entry is not None:
				flags, size, mtime_s, mtime_ns = entry.v2_data()
				else:
				# There are no mtime-cached directories in the Python implementation
				flags = DIRSTATE_V2_DIRECTORY
				size = 0
				mtime_s = 0
				mtime_ns = 0
				return NODE.pack(
				path_start,
				path_len,
				basename_start,
				copy_source_start,
				copy_source_len,
				self.children_offset,
				self.children_count,
				self.descendants_with_entry,
				self.tracked_descendants,
				flags,
				size,
				mtime_s,
				mtime_ns,
				)


				def pack_dirstate(map, copy_map):
				"""
				Pack `map` and `copy_map` into the dirstate v2 binary format and return
				the tuple of (data, metadata) bytearrays.

				The on-disk format expects a tree-like structure where the leaves are
				written first (and sorted per-directory), going up levels until the root
				node and writing that one to the docket. See more details on the on-disk
				format in `mercurial/helptext/internals/dirstate-v2`.

				Since both `map` and `copy_map` are flat dicts we need to figure out the
				hierarchy. This algorithm does so without having to build the entire tree
				in-memory: it only keeps the minimum number of nodes around to satisfy the
				format.

				# Algorithm explanation

				This explanation does not talk about the different counters for tracked
				descendants and storing the copies, but that work is pretty simple once this
				algorithm is in place.

				## Building a subtree

				First, sort `map`: this makes it so the leaves of the tree are contiguous
				per directory (i.e. a/b/c and a/b/d will be next to each other in the list),
				and enables us to use the ordering of folders to have a "cursor" of the
				current folder we're in without ever going twice in the same branch of the
				tree. The cursor is a node that remembers its parent and any information
				relevant to the format (see the `Node` class), building the relevant part
				of the tree lazily.
				Then, for each file in `map`, move the cursor into the tree to the
				corresponding folder of the file: for example, if the very first file
				is "a/b/c", we start from `Node[""]`, create `Node["a"]` which points to
				its parent `Node[""]`, then create `Node["a/b"]`, which points to its parent
				`Node["a"]`. These nodes are kept around in a stack.
				If the next file in `map` is in the same subtree ("a/b/d" or "a/b/e/f"), we
				add it to the stack and keep looping with the same logic of creating the
				tree nodes as needed. If however the next file in `map` is not in the same
				subtree ("a/other", if we're still in the "a/b" folder), then we know that
				the subtree we're in is complete.

				## Writing the subtree

				We have the entire subtree in the stack, so we start writing it to disk
				folder by folder. The way we write a folder is to pop the stack into a list
				until the folder changes, revert this list of direct children (to satisfy
				the format requirement that children be sorted). This process repeats until
				we hit the "other" subtree.

				An example:
				a
				dir1/b
				dir1/c
				dir2/dir3/d
				dir2/dir3/e
				dir2/f

				Would have us:
				- add to the stack until "dir2/dir3/e"
				- realize that "dir2/f" is in a different subtree
				- pop "dir2/dir3/e", "dir2/dir3/d", reverse them so they're sorted and
				pack them since the next entry is "dir2/dir3"
				- go back up to "dir2"
				- add "dir2/f" to the stack
				- realize we're done with the map
				- pop "dir2/f", "dir2/dir3" from the stack, reverse and pack them
				- go up to the root node, do the same to write "a", "dir1" and "dir2" in
				that order

				## Special case for the root node

				The root node is not serialized in the format, but its information is
				written to the docket. Again, see more details on the on-disk format in
				`mercurial/helptext/internals/dirstate-v2`.
				"""
				data = bytearray()
				root_nodes_start = 0
				root_nodes_len = 0
				nodes_with_entry_count = 0
				nodes_with_copy_source_count = 0
				# Will always be 0 since this implementation always re-writes everything
				# to disk
				unreachable_bytes = 0
				unused = b'\x00' * 4
				# This is an optimization that's only useful for the Rust implementation
				ignore_patterns_hash = b'\x00' * 20

				if len(map) == 0:
				tree_metadata = TREE_METADATA.pack(
				root_nodes_start,
				root_nodes_len,
				nodes_with_entry_count,
				nodes_with_copy_source_count,
				unreachable_bytes,
				unused,
				ignore_patterns_hash,
				)
				return data, tree_metadata

				sorted_map = sorted(map.items(), key=lambda x: x[0].split(b"/"))

				# Use a stack to have to only remember the nodes we currently need
				# instead of building the entire tree in memory
				stack = []
				current_node = Node(b"", None)
				stack.append(current_node)

				for index, (path, entry) in enumerate(sorted_map, 1):
				nodes_with_entry_count += 1
				if path in copy_map:
				nodes_with_copy_source_count += 1
				current_folder = get_folder(path)
				current_node = move_to_correct_node_in_tree(
				current_folder, current_node, stack
				)

				current_node.children_count += 1
				# Entries from `map` are never `None`
				if entry.tracked:
				current_node.tracked_descendants += 1
				current_node.descendants_with_entry += 1
				stack.append(Node(path, entry, current_node))

				should_pack = True
				next_path = None
				if index < len(sorted_map):
				# Determine if the next entry is in the same sub-tree, if so don't
				# pack yet
				next_path = sorted_map[index][0]
				should_pack = not is_ancestor(next_path, current_folder)
				if should_pack:
				pack_directory_children(current_node, copy_map, data, stack)
				while stack and current_node.path != b"":
				# Go up the tree and write until we reach the folder of the next
				# entry (if any, otherwise the root)
				parent = current_node.parent
				in_ancestor_of_next_path = next_path is not None and (
				is_ancestor(next_path, get_folder(stack[-1].path))
				)
				if parent is None or in_ancestor_of_next_path:
				break
				pack_directory_children(parent, copy_map, data, stack)
				current_node = parent

				# Special case for the root node since we don't write it to disk, only its
				# children to the docket
				current_node = stack.pop()
				assert current_node.path == b"", current_node.path
				assert len(stack) == 0, len(stack)

				tree_metadata = TREE_METADATA.pack(
				current_node.children_offset,
				current_node.children_count,
				nodes_with_entry_count,
				nodes_with_copy_source_count,
				unreachable_bytes,
				unused,
				ignore_patterns_hash,
				)

				return data, tree_metadata


				def get_folder(path):
				"""
				Return the folder of the path that's given, an empty string for root paths.
				"""
				return path.rsplit(b'/', 1)[0] if b'/' in path else b''


				def is_ancestor(path, maybe_ancestor):
				"""Returns whether `maybe_ancestor` is an ancestor of `path`.

				>>> is_ancestor(b"a", b"")
				True
				>>> is_ancestor(b"a/b/c", b"a/b/c")
				False
				>>> is_ancestor(b"hgext3rd/__init__.py", b"hgext")
				False
				>>> is_ancestor(b"hgext3rd/__init__.py", b"hgext3rd")
				True
				"""
				if maybe_ancestor == b"":
				return True
				if path <= maybe_ancestor:
				return False
				path_components = path.split(b"/")
				ancestor_components = maybe_ancestor.split(b"/")
				return all(c == o for c, o in zip(path_components, ancestor_components))


				def move_to_correct_node_in_tree(target_folder, current_node, stack):
				"""
				Move inside the dirstate node tree to the node corresponding to
				`target_folder`, creating the missing nodes along the way if needed.
				"""
				while target_folder != current_node.path:
				if is_ancestor(target_folder, current_node.path):
				# We need to go down a folder
				prefix = target_folder[len(current_node.path) :].lstrip(b'/')
				subfolder_name = prefix.split(b'/', 1)[0]
				if current_node.path:
				subfolder_path = current_node.path + b'/' + subfolder_name
				else:
				subfolder_path = subfolder_name
				next_node = stack[-1]
				if next_node.path == target_folder:
				# This folder is now a file and only contains removed entries
				# merge with the last node
				current_node = next_node
				else:
				current_node.children_count += 1
				current_node = Node(subfolder_path, None, current_node)
				stack.append(current_node)
				else:
				# We need to go up a folder
				current_node = current_node.parent
				return current_node


				def pack_directory_children(node, copy_map, data, stack):
				"""
				Write the binary representation of the direct sorted children of `node` to
				`data`
				"""
				direct_children = []

				while stack[-1].path != b"" and get_folder(stack[-1].path) == node.path:
				direct_children.append(stack.pop())
				if not direct_children:
				raise error.ProgrammingError(b"no direct children for %r" % node.path)

				# Reverse the stack to get the correct sorted order
				direct_children.reverse()
				packed_children = bytearray()
				# Write the paths to `data`. Pack child nodes but don't write them yet
				for child in direct_children:
				packed = child.pack(copy_map=copy_map, paths_offset=len(data))
				packed_children.extend(packed)
				data.extend(child.path)
				data.extend(copy_map.get(child.path, b""))
				node.tracked_descendants += child.tracked_descendants
				node.descendants_with_entry += child.descendants_with_entry
				# Write the fixed-size child nodes all together
				node.children_offset = len(data)
				data.extend(packed_children)