upstream/mercurial-mirror Files · mercurial/cffi/osutil.py

sparse-read: move from a recursive-based approach to a heap-based one...

sparse-read: move from a recursive-based approach to a heap-based one The previous recursive approach was trying to optimise each read slice to have a good density. It had the tendency to over-optimize smaller slices while leaving larger hole in others. The new approach focuses on improving the combined density of all the reads, instead of the individual slices. It slices at the largest gaps first, as they reduce the total amount of read data the most efficiently. Another benefit of this approach is that we iterate over the delta chain only once, reducing the overhead of slicing long delta chains. On the repository we use for tests, the new approach shows similar or faster performance than the current default linear full read. The repository contains about 450,000 revisions with many concurrent topological branches. Tests have been run on two versions of the repository: one built with the current delta constraint, and the other with an unlimited delta span (using 'experimental.maxdeltachainspan=0') Below are timings for building 1% of all the revision in the manifest log using 'hg perfrevlogrevisions -m'. Times are given in seconds. They include the new couple of follow-up changeset in this series. delta-span standard unlimited linear-read 922s 632s sparse-read 814s 566s

Jun Wu - - Load All Authors

File last commit:

r34648:dacfcdd8 default


                r34881:9e18ab7f

default

Download file

             osutil.py
        
                    102 lines
            
             | 3.5 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / cffi / osutil.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # osutil.py - CFFI version of osutil.c

      #

      # Copyright 2016 Maciej Fijalkowski <fijall@gmail.com>

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      from __future__ import absolute_import

      import os

      import stat as statmod

      from ..pure.osutil import *

      from .. import (

          pycompat,

      )

      if pycompat.isdarwin:

          from . import _osutil

          ffi = _osutil.ffi

          lib = _osutil.lib

          listdir_batch_size = 4096

          # tweakable number, only affects performance, which chunks

          # of bytes do we get back from getattrlistbulk

          attrkinds = [None] * 20 # we need the max no for enum VXXX, 20 is plenty

          attrkinds[lib.VREG] = statmod.S_IFREG

          attrkinds[lib.VDIR] = statmod.S_IFDIR

          attrkinds[lib.VLNK] = statmod.S_IFLNK

          attrkinds[lib.VBLK] = statmod.S_IFBLK

          attrkinds[lib.VCHR] = statmod.S_IFCHR

          attrkinds[lib.VFIFO] = statmod.S_IFIFO

          attrkinds[lib.VSOCK] = statmod.S_IFSOCK

          class stat_res(object):

              def __init__(self, st_mode, st_mtime, st_size):

                  self.st_mode = st_mode

                  self.st_mtime = st_mtime

                  self.st_size = st_size

          tv_sec_ofs = ffi.offsetof("struct timespec", "tv_sec")

          buf = ffi.new("char[]", listdir_batch_size)

          def listdirinternal(dfd, req, stat, skip):

              ret = []

              while True:

                  r = lib.getattrlistbulk(dfd, req, buf, listdir_batch_size, 0)

                  if r == 0:

                      break

                  if r == -1:

                      raise OSError(ffi.errno, os.strerror(ffi.errno))

                  cur = ffi.cast("val_attrs_t*", buf)

                  for i in range(r):

                      lgt = cur.length

                      assert lgt == ffi.cast('uint32_t*', cur)[0]

                      ofs = cur.name_info.attr_dataoffset

                      str_lgt = cur.name_info.attr_length

                      base_ofs = ffi.offsetof('val_attrs_t', 'name_info')

                      name = str(ffi.buffer(ffi.cast("char*", cur) + base_ofs + ofs,

                                 str_lgt - 1))

                      tp = attrkinds[cur.obj_type]

                      if name == "." or name == "..":

                          continue

                      if skip == name and tp == statmod.S_ISDIR:

                          return []

                      if stat:

                          mtime = cur.mtime.tv_sec

                          mode = (cur.accessmask & ~lib.S_IFMT)| tp

                          ret.append((name, tp, stat_res(st_mode=mode, st_mtime=mtime,

                                      st_size=cur.datalength)))

                      else:

                          ret.append((name, tp))

                      cur = ffi.cast("val_attrs_t*", int(ffi.cast("intptr_t", cur))

                          + lgt)

              return ret

          def listdir(path, stat=False, skip=None):

              req = ffi.new("struct attrlist*")

              req.bitmapcount = lib.ATTR_BIT_MAP_COUNT

              req.commonattr = (lib.ATTR_CMN_RETURNED_ATTRS |

                                lib.ATTR_CMN_NAME |

                                lib.ATTR_CMN_OBJTYPE |

                                lib.ATTR_CMN_ACCESSMASK |

                                lib.ATTR_CMN_MODTIME)

              req.fileattr = lib.ATTR_FILE_DATALENGTH

              dfd = lib.open(path, lib.O_RDONLY, 0)

              if dfd == -1:

                  raise OSError(ffi.errno, os.strerror(ffi.errno))

              try:

                  ret = listdirinternal(dfd, req, stat, skip)

              finally:

                  try:

                      lib.close(dfd)

                  except BaseException:

                      pass # we ignore all the errors from closing, not

                      # much we can do about that

              return ret

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# osutil.py - CFFI version of osutil.c
				#
				# Copyright 2016 Maciej Fijalkowski <fijall@gmail.com>
				#
				# This software may be used and distributed according to the terms of the
				# GNU General Public License version 2 or any later version.

				from __future__ import absolute_import

				import os
				import stat as statmod

				from ..pure.osutil import *

				from .. import (
				pycompat,
				)

				if pycompat.isdarwin:
				from . import _osutil

				ffi = _osutil.ffi
				lib = _osutil.lib

				listdir_batch_size = 4096
				# tweakable number, only affects performance, which chunks
				# of bytes do we get back from getattrlistbulk

				attrkinds = [None] * 20 # we need the max no for enum VXXX, 20 is plenty

				attrkinds[lib.VREG] = statmod.S_IFREG
				attrkinds[lib.VDIR] = statmod.S_IFDIR
				attrkinds[lib.VLNK] = statmod.S_IFLNK
				attrkinds[lib.VBLK] = statmod.S_IFBLK
				attrkinds[lib.VCHR] = statmod.S_IFCHR
				attrkinds[lib.VFIFO] = statmod.S_IFIFO
				attrkinds[lib.VSOCK] = statmod.S_IFSOCK

				class stat_res(object):
				def __init__(self, st_mode, st_mtime, st_size):
				self.st_mode = st_mode
				self.st_mtime = st_mtime
				self.st_size = st_size

				tv_sec_ofs = ffi.offsetof("struct timespec", "tv_sec")
				buf = ffi.new("char[]", listdir_batch_size)

				def listdirinternal(dfd, req, stat, skip):
				ret = []
				while True:
				r = lib.getattrlistbulk(dfd, req, buf, listdir_batch_size, 0)
				if r == 0:
				break
				if r == -1:
				raise OSError(ffi.errno, os.strerror(ffi.errno))
				cur = ffi.cast("val_attrs_t*", buf)
				for i in range(r):
				lgt = cur.length
				assert lgt == ffi.cast('uint32_t*', cur)[0]
				ofs = cur.name_info.attr_dataoffset
				str_lgt = cur.name_info.attr_length
				base_ofs = ffi.offsetof('val_attrs_t', 'name_info')
				name = str(ffi.buffer(ffi.cast("char*", cur) + base_ofs + ofs,
				str_lgt - 1))
				tp = attrkinds[cur.obj_type]
				if name == "." or name == "..":
				continue
				if skip == name and tp == statmod.S_ISDIR:
				return []
				if stat:
				mtime = cur.mtime.tv_sec
				mode = (cur.accessmask & ~lib.S_IFMT)\| tp
				ret.append((name, tp, stat_res(st_mode=mode, st_mtime=mtime,
				st_size=cur.datalength)))
				else:
				ret.append((name, tp))
				cur = ffi.cast("val_attrs_t*", int(ffi.cast("intptr_t", cur))
				+ lgt)
				return ret

				def listdir(path, stat=False, skip=None):
				req = ffi.new("struct attrlist*")
				req.bitmapcount = lib.ATTR_BIT_MAP_COUNT
				req.commonattr = (lib.ATTR_CMN_RETURNED_ATTRS \|
				lib.ATTR_CMN_NAME \|
				lib.ATTR_CMN_OBJTYPE \|
				lib.ATTR_CMN_ACCESSMASK \|
				lib.ATTR_CMN_MODTIME)
				req.fileattr = lib.ATTR_FILE_DATALENGTH
				dfd = lib.open(path, lib.O_RDONLY, 0)
				if dfd == -1:
				raise OSError(ffi.errno, os.strerror(ffi.errno))

				try:
				ret = listdirinternal(dfd, req, stat, skip)
				finally:
				try:
				lib.close(dfd)
				except BaseException:
				pass # we ignore all the errors from closing, not
				# much we can do about that
				return ret