upstream/mercurial-mirror Commit - r52022:1625fe80

py3: pass unicode strings to hasattr() throughout...

Martin von Zweigbergk -

r52022:1625fe80 default

parent child

hgext/convert/cvsps.py

0 +3 -3

              # Mercurial built-in replacement for cvsps.
              #
              # Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              import functools
              import os
              import pickle
              import re
              from mercurial.i18n import _
              from mercurial.pycompat import open
              from mercurial import (
                  encoding,
                  error,
                  hook,
                  pycompat,
                  util,
              )
              from mercurial.utils import (
                  dateutil,
                  procutil,
                  stringutil,
              )
              class logentry:
                  """Class logentry has the following attributes:
                  .author    - author name as CVS knows it
                  .branch    - name of branch this revision is on
                  .branches  - revision tuple of branches starting at this revision
                  .comment   - commit message
                  .commitid  - CVS commitid or None
                  .date      - the commit date as a (time, tz) tuple
                  .dead      - true if file revision is dead
                  .file      - Name of file
                  .lines     - a tuple (+lines, -lines) or None
                  .parent    - Previous revision of this entry
                  .rcs       - name of file as returned from CVS
                  .revision  - revision number as tuple
                  .tags      - list of tags on the file
                  .synthetic - is this a synthetic "file ... added on ..." revision?
                  .mergepoint - the branch that has been merged from (if present in
                                rlog output) or None
                  .branchpoints - the branches that start at the current entry or empty
                  """
                  def __init__(self, **entries):
                      self.synthetic = False
                      self.__dict__.update(entries)
                  def __repr__(self):
                      items = ("%s=%r" % (k, self.__dict__[k]) for k in sorted(self.__dict__))
                      return "%s(%s)" % (type(self).__name__, ", ".join(items))
              class logerror(Exception):
                  pass
              def getrepopath(cvspath):
                  """Return the repository path from a CVS path.
                  >>> getrepopath(b'/foo/bar')
                  '/foo/bar'
                  >>> getrepopath(b'c:/foo/bar')
                  '/foo/bar'
                  >>> getrepopath(b':pserver:10/foo/bar')
                  '/foo/bar'
                  >>> getrepopath(b':pserver:10c:/foo/bar')
                  '/foo/bar'
                  >>> getrepopath(b':pserver:/foo/bar')
                  '/foo/bar'
                  >>> getrepopath(b':pserver:c:/foo/bar')
                  '/foo/bar'
                  >>> getrepopath(b':pserver:truc@foo.bar:/foo/bar')
                  '/foo/bar'
                  >>> getrepopath(b':pserver:truc@foo.bar:c:/foo/bar')
                  '/foo/bar'
                  >>> getrepopath(b'user@server/path/to/repository')
                  '/path/to/repository'
                  """
                  # According to CVS manual, CVS paths are expressed like:
                  # [:method:][[user][:password]@]hostname[:[port]]/path/to/repository
                  #
                  # CVSpath is splitted into parts and then position of the first occurrence
                  # of the '/' char after the '@' is located. The solution is the rest of the
                  # string after that '/' sign including it
                  parts = cvspath.split(b':')
                  atposition = parts[-1].find(b'@')
                  start = 0
                  if atposition != -1:
                      start = atposition
                  repopath = parts[-1][parts[-1].find(b'/', start) :]
                  return repopath
              def createlog(ui, directory=None, root=b"", rlog=True, cache=None):
                  '''Collect the CVS rlog'''
                  # Because we store many duplicate commit log messages, reusing strings
                  # saves a lot of memory and pickle storage space.
                  _scache = {}
                  def scache(s):
                      """return a shared version of a string"""
                      return _scache.setdefault(s, s)
                  ui.status(_(b'collecting CVS rlog\n'))
                  log = []  # list of logentry objects containing the CVS state
                  # patterns to match in CVS (r)log output, by state of use
                  re_00 = re.compile(b'RCS file: (.+)$')
                  re_01 = re.compile(b'cvs \\[r?log aborted\\]: (.+)$')
                  re_02 = re.compile(b'cvs (r?log|server): (.+)\n$')
                  re_03 = re.compile(
                      b"(Cannot access.+CVSROOT)|(can't create temporary directory.+)$"
                  )
                  re_10 = re.compile(b'Working file: (.+)$')
                  re_20 = re.compile(b'symbolic names:')
                  re_30 = re.compile(b'\t(.+): ([\\d.]+)$')
                  re_31 = re.compile(b'----------------------------$')
                  re_32 = re.compile(
                      b'======================================='
                      b'======================================$'
                  )
                  re_50 = re.compile(br'revision ([\d.]+)(\s+locked by:\s+.+;)?$')
                  re_60 = re.compile(
                      br'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);'
                      br'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?'
                      br'(\s+commitid:\s+([^;]+);)?'
                      br'(.*mergepoint:\s+([^;]+);)?'
                  )
                  re_70 = re.compile(b'branches: (.+);$')
                  file_added_re = re.compile(br'file [^/]+ was (initially )?added on branch')
                  prefix = b''  # leading path to strip of what we get from CVS
                  if directory is None:
                      # Current working directory
                      # Get the real directory in the repository
                      try:
                          with open(os.path.join(b'CVS', b'Repository'), b'rb') as f:
                              prefix = f.read().strip()
                          directory = prefix
                          if prefix == b".":
                              prefix = b""
                      except IOError:
                          raise logerror(_(b'not a CVS sandbox'))
                      if prefix and not prefix.endswith(pycompat.ossep):
                          prefix += pycompat.ossep
                      # Use the Root file in the sandbox, if it exists
                      try:
                          root = open(os.path.join(b'CVS', b'Root'), b'rb').read().strip()
                      except IOError:
                          pass
                  if not root:
                      root = encoding.environ.get(b'CVSROOT', b'')
                  # read log cache if one exists
                  oldlog = []
                  date = None
                  if cache:
                      cachedir = os.path.expanduser(b'~/.hg.cvsps')
                      if not os.path.exists(cachedir):
                          os.mkdir(cachedir)
                      # The cvsps cache pickle needs a uniquified name, based on the
                      # repository location. The address may have all sort of nasties
                      # in it, slashes, colons and such. So here we take just the
                      # alphanumeric characters, concatenated in a way that does not
                      # mix up the various components, so that
                      #    :pserver:user@server:/path
                      # and
                      #    /pserver/user/server/path
                      # are mapped to different cache file names.
                      cachefile = root.split(b":") + [directory, b"cache"]
                      cachefile = [b'-'.join(re.findall(br'\w+', s)) for s in cachefile if s]
                      cachefile = os.path.join(
                          cachedir, b'.'.join([s for s in cachefile if s])
                      )
                  if cache == b'update':
                      try:
                          ui.note(_(b'reading cvs log cache %s\n') % cachefile)
                          oldlog = pickle.load(open(cachefile, b'rb'))
                          for e in oldlog:
                              if not (
-                                 hasattr(e, b'branchpoints')
-                                 and hasattr(e, b'commitid')
-                                 and hasattr(e, b'mergepoint')
+                                 hasattr(e, 'branchpoints')
+                                 and hasattr(e, 'commitid')
+                                 and hasattr(e, 'mergepoint')
                              ):
                                  ui.status(_(b'ignoring old cache\n'))
                                  oldlog = []
                                  break
                          ui.note(_(b'cache has %d log entries\n') % len(oldlog))
                      except Exception as e:
                          ui.note(_(b'error reading cache: %r\n') % e)
                      if oldlog:
                          date = oldlog[-1].date  # last commit date as a (time,tz) tuple
                          date = dateutil.datestr(date, b'%Y/%m/%d %H:%M:%S %1%2')
                  # build the CVS commandline
                  cmd = [b'cvs', b'-q']
                  if root:
                      cmd.append(b'-d%s' % root)
                      p = util.normpath(getrepopath(root))
                      if not p.endswith(b'/'):
                          p += b'/'
                      if prefix:
                          # looks like normpath replaces "" by "."
                          prefix = p + util.normpath(prefix)
                      else:
                          prefix = p
                  cmd.append([b'log', b'rlog'][rlog])
                  if date:
                      # no space between option and date string
                      cmd.append(b'-d>%s' % date)
                  cmd.append(directory)
                  # state machine begins here
                  tags = {}  # dictionary of revisions on current file with their tags
                  branchmap = {}  # mapping between branch names and revision numbers
                  rcsmap = {}
                  state = 0
                  store = False  # set when a new record can be appended
                  cmd = [procutil.shellquote(arg) for arg in cmd]
                  ui.note(_(b"running %s\n") % (b' '.join(cmd)))
                  ui.debug(b"prefix=%r directory=%r root=%r\n" % (prefix, directory, root))
                  pfp = procutil.popen(b' '.join(cmd), b'rb')
                  peek = util.fromnativeeol(pfp.readline())
                  while True:
                      line = peek
                      if line == b'':
                          break
                      peek = util.fromnativeeol(pfp.readline())
                      if line.endswith(b'\n'):
                          line = line[:-1]
                      # ui.debug('state=%d line=%r\n' % (state, line))
                      if state == 0:
                          # initial state, consume input until we see 'RCS file'
                          match = re_00.match(line)
                          if match:
                              rcs = match.group(1)
                              tags = {}
                              if rlog:
                                  filename = util.normpath(rcs[:-2])
                                  if filename.startswith(prefix):
                                      filename = filename[len(prefix) :]
                                  if filename.startswith(b'/'):
                                      filename = filename[1:]
                                  if filename.startswith(b'Attic/'):
                                      filename = filename[6:]
                                  else:
                                      filename = filename.replace(b'/Attic/', b'/')
                                  state = 2
                                  continue
                              state = 1
                              continue
                          match = re_01.match(line)
                          if match:
                              raise logerror(match.group(1))
                          match = re_02.match(line)
                          if match:
                              raise logerror(match.group(2))
                          if re_03.match(line):
                              raise logerror(line)
                      elif state == 1:
                          # expect 'Working file' (only when using log instead of rlog)
                          match = re_10.match(line)
                          assert match, _(b'RCS file must be followed by working file')
                          filename = util.normpath(match.group(1))
                          state = 2
                      elif state == 2:
                          # expect 'symbolic names'
                          if re_20.match(line):
                              branchmap = {}
                              state = 3
                      elif state == 3:
                          # read the symbolic names and store as tags
                          match = re_30.match(line)
                          if match:
                              rev = [int(x) for x in match.group(2).split(b'.')]
                              # Convert magic branch number to an odd-numbered one
                              revn = len(rev)
                              if revn > 3 and (revn % 2) == 0 and rev[-2] == 0:
                                  rev = rev[:-2] + rev[-1:]
                              rev = tuple(rev)
                              if rev not in tags:
                                  tags[rev] = []
                              tags[rev].append(match.group(1))
                              branchmap[match.group(1)] = match.group(2)
                          elif re_31.match(line):
                              state = 5
                          elif re_32.match(line):
                              state = 0
                      elif state == 4:
                          # expecting '------' separator before first revision
                          if re_31.match(line):
                              state = 5
                          else:
                              assert not re_32.match(line), _(
                                  b'must have at least some revisions'
                              )
                      elif state == 5:
                          # expecting revision number and possibly (ignored) lock indication
                          # we create the logentry here from values stored in states 0 to 4,
                          # as this state is re-entered for subsequent revisions of a file.
                          match = re_50.match(line)
                          assert match, _(b'expected revision number')
                          e = logentry(
                              rcs=scache(rcs),
                              file=scache(filename),
                              revision=tuple([int(x) for x in match.group(1).split(b'.')]),
                              branches=[],
                              parent=None,
                              commitid=None,
                              mergepoint=None,
                              branchpoints=set(),
                          )
                          state = 6
                      elif state == 6:
                          # expecting date, author, state, lines changed
                          match = re_60.match(line)
                          assert match, _(b'revision must be followed by date line')
                          d = match.group(1)
                          if d[2] == b'/':
                              # Y2K
                              d = b'19' + d
                          if len(d.split()) != 3:
                              # cvs log dates always in GMT
                              d = d + b' UTC'
                          e.date = dateutil.parsedate(
                              d,
                              [
                                  b'%y/%m/%d %H:%M:%S',
                                  b'%Y/%m/%d %H:%M:%S',
                                  b'%Y-%m-%d %H:%M:%S',
                              ],
                          )
                          e.author = scache(match.group(2))
                          e.dead = match.group(3).lower() == b'dead'
                          if match.group(5):
                              if match.group(6):
                                  e.lines = (int(match.group(5)), int(match.group(6)))
                              else:
                                  e.lines = (int(match.group(5)), 0)
                          elif match.group(6):
                              e.lines = (0, int(match.group(6)))
                          else:
                              e.lines = None
                          if match.group(7):  # cvs 1.12 commitid
                              e.commitid = match.group(8)
                          if match.group(9):  # cvsnt mergepoint
                              myrev = match.group(10).split(b'.')
                              if len(myrev) == 2:  # head
                                  e.mergepoint = b'HEAD'
                              else:
                                  myrev = b'.'.join(myrev[:-2] + [b'0', myrev[-2]])
                                  branches = [b for b in branchmap if branchmap[b] == myrev]
                                  assert len(branches) == 1, (
                                      b'unknown branch: %s' % e.mergepoint
                                  )
                                  e.mergepoint = branches[0]
                          e.comment = []
                          state = 7
                      elif state == 7:
                          # read the revision numbers of branches that start at this revision
                          # or store the commit log message otherwise
                          m = re_70.match(line)
                          if m:
                              e.branches = [
                                  tuple([int(y) for y in x.strip().split(b'.')])
                                  for x in m.group(1).split(b';')
                              ]
                              state = 8
                          elif re_31.match(line) and re_50.match(peek):
                              state = 5
                              store = True
                          elif re_32.match(line):
                              state = 0
                              store = True
                          else:
                              e.comment.append(line)
                      elif state == 8:
                          # store commit log message
                          if re_31.match(line):
                              cpeek = peek
                              if cpeek.endswith(b'\n'):
                                  cpeek = cpeek[:-1]
                              if re_50.match(cpeek):
                                  state = 5
                                  store = True
                              else:
                                  e.comment.append(line)
                          elif re_32.match(line):
                              state = 0
                              store = True
                          else:
                              e.comment.append(line)
                      # When a file is added on a branch B1, CVS creates a synthetic
                      # dead trunk revision 1.1 so that the branch has a root.
                      # Likewise, if you merge such a file to a later branch B2 (one
                      # that already existed when the file was added on B1), CVS
                      # creates a synthetic dead revision 1.1.x.1 on B2.  Don't drop
                      # these revisions now, but mark them synthetic so
                      # createchangeset() can take care of them.
                      if (
                          store
                          and e.dead
                          and e.revision[-1] == 1
                          and len(e.comment) == 1  # 1.1 or 1.1.x.1
                          and file_added_re.match(e.comment[0])
                      ):
                          ui.debug(
                              b'found synthetic revision in %s: %r\n' % (e.rcs, e.comment[0])
                          )
                          e.synthetic = True
                      if store:
                          # clean up the results and save in the log.
                          store = False
                          e.tags = sorted([scache(x) for x in tags.get(e.revision, [])])
                          e.comment = scache(b'\n'.join(e.comment))
                          revn = len(e.revision)
                          if revn > 3 and (revn % 2) == 0:
                              e.branch = tags.get(e.revision[:-1], [None])[0]
                          else:
                              e.branch = None
                          # find the branches starting from this revision
                          branchpoints = set()
                          for branch, revision in branchmap.items():
                              revparts = tuple([int(i) for i in revision.split(b'.')])
                              if len(revparts) < 2:  # bad tags
                                  continue
                              if revparts[-2] == 0 and revparts[-1] % 2 == 0:
                                  # normal branch
                                  if revparts[:-2] == e.revision:
                                      branchpoints.add(branch)
                              elif revparts == (1, 1, 1):  # vendor branch
                                  if revparts in e.branches:
                                      branchpoints.add(branch)
                          e.branchpoints = branchpoints
                          log.append(e)
                          rcsmap[e.rcs.replace(b'/Attic/', b'/')] = e.rcs
                          if len(log) % 100 == 0:
                              ui.status(
                                  stringutil.ellipsis(b'%d %s' % (len(log), e.file), 80)
                                  + b'\n'
                              )
                  log.sort(key=lambda x: (x.rcs, x.revision))
                  # find parent revisions of individual files
                  versions = {}
                  for e in sorted(oldlog, key=lambda x: (x.rcs, x.revision)):
                      rcs = e.rcs.replace(b'/Attic/', b'/')
                      if rcs in rcsmap:
                          e.rcs = rcsmap[rcs]
                      branch = e.revision[:-1]
                      versions[(e.rcs, branch)] = e.revision
                  for e in log:
                      branch = e.revision[:-1]
                      p = versions.get((e.rcs, branch), None)
                      if p is None:
                          p = e.revision[:-2]
                      e.parent = p
                      versions[(e.rcs, branch)] = e.revision
                  # update the log cache
                  if cache:
                      if log:
                          # join up the old and new logs
                          log.sort(key=lambda x: x.date)
                          if oldlog and oldlog[-1].date >= log[0].date:
                              raise logerror(
                                  _(
                                      b'log cache overlaps with new log entries,'
                                      b' re-run without cache.'
                                  )
                              )
                          log = oldlog + log
                          # write the new cachefile
                          ui.note(_(b'writing cvs log cache %s\n') % cachefile)
                          pickle.dump(log, open(cachefile, b'wb'))
                      else:
                          log = oldlog
                  ui.status(_(b'%d log entries\n') % len(log))
                  encodings = ui.configlist(b'convert', b'cvsps.logencoding')
                  if encodings:
                      def revstr(r):
                          # this is needed, because logentry.revision is a tuple of "int"
                          # (e.g. (1, 2) for "1.2")
                          return b'.'.join(pycompat.maplist(pycompat.bytestr, r))
                      for entry in log:
                          comment = entry.comment
                          for e in encodings:
                              try:
                                  entry.comment = comment.decode(pycompat.sysstr(e)).encode(
                                      'utf-8'
                                  )
                                  if ui.debugflag:
                                      ui.debug(
                                          b"transcoding by %s: %s of %s\n"
                                          % (e, revstr(entry.revision), entry.file)
                                      )
                                  break
                              except UnicodeDecodeError:
                                  pass  # try next encoding
                              except LookupError as inst:  # unknown encoding, maybe
                                  raise error.Abort(
                                      pycompat.bytestr(inst),
                                      hint=_(
                                          b'check convert.cvsps.logencoding configuration'
                                      ),
                                  )
                          else:
                              raise error.Abort(
                                  _(
                                      b"no encoding can transcode"
                                      b" CVS log message for %s of %s"
                                  )
                                  % (revstr(entry.revision), entry.file),
                                  hint=_(b'check convert.cvsps.logencoding configuration'),
                              )
                  hook.hook(ui, None, b"cvslog", True, log=log)
                  return log
              class changeset:
                  """Class changeset has the following attributes:
                  .id        - integer identifying this changeset (list index)
                  .author    - author name as CVS knows it
                  .branch    - name of branch this changeset is on, or None
                  .comment   - commit message
                  .commitid  - CVS commitid or None
                  .date      - the commit date as a (time,tz) tuple
                  .entries   - list of logentry objects in this changeset
                  .parents   - list of one or two parent changesets
                  .tags      - list of tags on this changeset
                  .synthetic - from synthetic revision "file ... added on branch ..."
                  .mergepoint- the branch that has been merged from or None
                  .branchpoints- the branches that start at the current entry or empty
                  """
                  def __init__(self, **entries):
                      self.id = None
                      self.synthetic = False
                      self.__dict__.update(entries)
                  def __repr__(self):
                      items = (
                          b"%s=%r" % (k, self.__dict__[k]) for k in sorted(self.__dict__)
                      )
                      return b"%s(%s)" % (type(self).__name__, b", ".join(items))
              def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
                  '''Convert log into changesets.'''
                  ui.status(_(b'creating changesets\n'))
                  # try to order commitids by date
                  mindate = {}
                  for e in log:
                      if e.commitid:
                          if e.commitid not in mindate:
                              mindate[e.commitid] = e.date
                          else:
                              mindate[e.commitid] = min(e.date, mindate[e.commitid])
                  # Merge changesets
                  log.sort(
                      key=lambda x: (
                          mindate.get(x.commitid, (-1, 0)),
                          x.commitid or b'',
                          x.comment,
                          x.author,
                          x.branch or b'',
                          x.date,
                          x.branchpoints,
                      )
                  )
                  changesets = []
                  files = set()
                  c = None
                  for i, e in enumerate(log):
                      # Check if log entry belongs to the current changeset or not.
                      # Since CVS is file-centric, two different file revisions with
                      # different branchpoints should be treated as belonging to two
                      # different changesets (and the ordering is important and not
                      # honoured by cvsps at this point).
                      #
                      # Consider the following case:
                      # foo 1.1 branchpoints: [MYBRANCH]
                      # bar 1.1 branchpoints: [MYBRANCH, MYBRANCH2]
                      #
                      # Here foo is part only of MYBRANCH, but not MYBRANCH2, e.g. a
                      # later version of foo may be in MYBRANCH2, so foo should be the
                      # first changeset and bar the next and MYBRANCH and MYBRANCH2
                      # should both start off of the bar changeset. No provisions are
                      # made to ensure that this is, in fact, what happens.
                      if not (
                          c
                          and e.branchpoints == c.branchpoints
                          and (  # cvs commitids
                              (e.commitid is not None and e.commitid == c.commitid)
                              or (  # no commitids, use fuzzy commit detection
                                  (e.commitid is None or c.commitid is None)
                                  and e.comment == c.comment
                                  and e.author == c.author
                                  and e.branch == c.branch
                                  and (
                                      (c.date[0] + c.date[1])
                                      <= (e.date[0] + e.date[1])
                                      <= (c.date[0] + c.date[1]) + fuzz
                                  )
                                  and e.file not in files
                              )
                          )
                      ):
                          c = changeset(
                              comment=e.comment,
                              author=e.author,
                              branch=e.branch,
                              date=e.date,
                              entries=[],
                              mergepoint=e.mergepoint,
                              branchpoints=e.branchpoints,
                              commitid=e.commitid,
                          )
                          changesets.append(c)
                          files = set()
                          if len(changesets) % 100 == 0:
                              t = b'%d %s' % (
                                  len(changesets),
                                  pycompat.byterepr(e.comment)[2:-1],
                              )
                              ui.status(stringutil.ellipsis(t, 80) + b'\n')
                      c.entries.append(e)
                      files.add(e.file)
                      c.date = e.date  # changeset date is date of latest commit in it
                  # Mark synthetic changesets
                  for c in changesets:
                      # Synthetic revisions always get their own changeset, because
                      # the log message includes the filename.  E.g. if you add file3
                      # and file4 on a branch, you get four log entries and three
                      # changesets:
                      #   "File file3 was added on branch ..." (synthetic, 1 entry)
                      #   "File file4 was added on branch ..." (synthetic, 1 entry)
                      #   "Add file3 and file4 to fix ..."     (real, 2 entries)
                      # Hence the check for 1 entry here.
                      c.synthetic = len(c.entries) == 1 and c.entries[0].synthetic
                  # Sort files in each changeset
                  def entitycompare(l, r):
                      """Mimic cvsps sorting order"""
                      l = l.file.split(b'/')
                      r = r.file.split(b'/')
                      nl = len(l)
                      nr = len(r)
                      n = min(nl, nr)
                      for i in range(n):
                          if i + 1 == nl and nl < nr:
                              return -1
                          elif i + 1 == nr and nl > nr:
                              return +1
                          elif l[i] < r[i]:
                              return -1
                          elif l[i] > r[i]:
                              return +1
                      return 0
                  for c in changesets:
                      c.entries.sort(key=functools.cmp_to_key(entitycompare))
                  # Sort changesets by date
                  odd = set()
                  def cscmp(l, r):
                      d = sum(l.date) - sum(r.date)
                      if d:
                          return d
                      # detect vendor branches and initial commits on a branch
                      le = {}
                      for e in l.entries:
                          le[e.rcs] = e.revision
                      re = {}
                      for e in r.entries:
                          re[e.rcs] = e.revision
                      d = 0
                      for e in l.entries:
                          if re.get(e.rcs, None) == e.parent:
                              assert not d
                              d = 1
                              break
                      for e in r.entries:
                          if le.get(e.rcs, None) == e.parent:
                              if d:
                                  odd.add((l, r))
                              d = -1
                              break
                      # By this point, the changesets are sufficiently compared that
                      # we don't really care about ordering. However, this leaves
                      # some race conditions in the tests, so we compare on the
                      # number of files modified, the files contained in each
                      # changeset, and the branchpoints in the change to ensure test
                      # output remains stable.
                      # recommended replacement for cmp from
                      # https://docs.python.org/3.0/whatsnew/3.0.html
                      c = lambda x, y: (x > y) - (x < y)
                      # Sort bigger changes first.
                      if not d:
                          d = c(len(l.entries), len(r.entries))
                      # Try sorting by filename in the change.
                      if not d:
                          d = c([e.file for e in l.entries], [e.file for e in r.entries])
                      # Try and put changes without a branch point before ones with
                      # a branch point.
                      if not d:
                          d = c(len(l.branchpoints), len(r.branchpoints))
                      return d
                  changesets.sort(key=functools.cmp_to_key(cscmp))
                  # Collect tags
                  globaltags = {}
                  for c in changesets:
                      for e in c.entries:
                          for tag in e.tags:
                              # remember which is the latest changeset to have this tag
                              globaltags[tag] = c
                  for c in changesets:
                      tags = set()
                      for e in c.entries:
                          tags.update(e.tags)
                      # remember tags only if this is the latest changeset to have it
                      c.tags = sorted(tag for tag in tags if globaltags[tag] is c)
                  # Find parent changesets, handle {{mergetobranch BRANCHNAME}}
                  # by inserting dummy changesets with two parents, and handle
                  # {{mergefrombranch BRANCHNAME}} by setting two parents.
                  if mergeto is None:
                      mergeto = br'{{mergetobranch ([-\w]+)}}'
                  if mergeto:
                      mergeto = re.compile(mergeto)
                  if mergefrom is None:
                      mergefrom = br'{{mergefrombranch ([-\w]+)}}'
                  if mergefrom:
                      mergefrom = re.compile(mergefrom)
                  versions = {}  # changeset index where we saw any particular file version
                  branches = {}  # changeset index where we saw a branch
                  n = len(changesets)
                  i = 0
                  while i < n:
                      c = changesets[i]
                      for f in c.entries:
                          versions[(f.rcs, f.revision)] = i
                      p = None
                      if c.branch in branches:
                          p = branches[c.branch]
                      else:
                          # first changeset on a new branch
                          # the parent is a changeset with the branch in its
                          # branchpoints such that it is the latest possible
                          # commit without any intervening, unrelated commits.
                          for candidate in range(i):
                              if c.branch not in changesets[candidate].branchpoints:
                                  if p is not None:
                                      break
                                  continue
                              p = candidate
                      c.parents = []
                      if p is not None:
                          p = changesets[p]
                          # Ensure no changeset has a synthetic changeset as a parent.
                          while p.synthetic:
                              assert len(p.parents) <= 1, _(
                                  b'synthetic changeset cannot have multiple parents'
                              )
                              if p.parents:
                                  p = p.parents[0]
                              else:
                                  p = None
                                  break
                          if p is not None:
                              c.parents.append(p)
                      if c.mergepoint:
                          if c.mergepoint == b'HEAD':
                              c.mergepoint = None
                          c.parents.append(changesets[branches[c.mergepoint]])
                      if mergefrom:
                          m = mergefrom.search(c.comment)
                          if m:
                              m = m.group(1)
                              if m == b'HEAD':
                                  m = None
                              try:
                                  candidate = changesets[branches[m]]
                              except KeyError:
                                  ui.warn(
                                      _(
                                          b"warning: CVS commit message references "
                                          b"non-existent branch %r:\n%s\n"
                                      )
                                      % (pycompat.bytestr(m), c.comment)
                                  )
                              if m in branches and c.branch != m and not candidate.synthetic:
                                  c.parents.append(candidate)
                      if mergeto:
                          m = mergeto.search(c.comment)
                          if m:
                              if m.groups():
                                  m = m.group(1)
                                  if m == b'HEAD':
                                      m = None
                              else:
                                  m = None  # if no group found then merge to HEAD
                              if m in branches and c.branch != m:
                                  # insert empty changeset for merge
                                  cc = changeset(
                                      author=c.author,
                                      branch=m,
                                      date=c.date,
                                      comment=b'convert-repo: CVS merge from branch %s'
                                      % c.branch,
                                      entries=[],
                                      tags=[],
                                      parents=[changesets[branches[m]], c],
                                  )
                                  changesets.insert(i + 1, cc)
                                  branches[m] = i + 1
                                  # adjust our loop counters now we have inserted a new entry
                                  n += 1
                                  i += 2
                                  continue
                      branches[c.branch] = i
                      i += 1
                  # Drop synthetic changesets (safe now that we have ensured no other
                  # changesets can have them as parents).
                  i = 0
                  while i < len(changesets):
                      if changesets[i].synthetic:
                          del changesets[i]
                      else:
                          i += 1
                  # Number changesets
                  for i, c in enumerate(changesets):
                      c.id = i + 1
                  if odd:
                      for l, r in odd:
                          if l.id is not None and r.id is not None:
                              ui.warn(
                                  _(b'changeset %d is both before and after %d\n')
                                  % (l.id, r.id)
                              )
                  ui.status(_(b'%d changeset entries\n') % len(changesets))
                  hook.hook(ui, None, b"cvschangesets", True, changesets=changesets)
                  return changesets
              def debugcvsps(ui, *args, **opts):
                  """Read CVS rlog for current directory or named path in
                  repository, and convert the log to changesets based on matching
                  commit log entries and dates.
                  """
                  opts = pycompat.byteskwargs(opts)
                  if opts[b"new_cache"]:
                      cache = b"write"
                  elif opts[b"update_cache"]:
                      cache = b"update"
                  else:
                      cache = None
                  revisions = opts[b"revisions"]
                  try:
                      if args:
                          log = []
                          for d in args:
                              log += createlog(ui, d, root=opts[b"root"], cache=cache)
                      else:
                          log = createlog(ui, root=opts[b"root"], cache=cache)
                  except logerror as e:
                      ui.write(b"%r\n" % e)
                      return
                  changesets = createchangeset(ui, log, opts[b"fuzz"])
                  del log
                  # Print changesets (optionally filtered)
                  off = len(revisions)
                  branches = {}  # latest version number in each branch
                  ancestors = {}  # parent branch
                  for cs in changesets:
                      if opts[b"ancestors"]:
                          if cs.branch not in branches and cs.parents and cs.parents[0].id:
                              ancestors[cs.branch] = (
                                  changesets[cs.parents[0].id - 1].branch,
                                  cs.parents[0].id,
                              )
                          branches[cs.branch] = cs.id
                      # limit by branches
                      if (
                          opts[b"branches"]
                          and (cs.branch or b'HEAD') not in opts[b"branches"]
                      ):
                          continue
                      if not off:
                          # Note: trailing spaces on several lines here are needed to have
                          #       bug-for-bug compatibility with cvsps.
                          ui.write(b'---------------------\n')
                          ui.write((b'PatchSet %d \n' % cs.id))
                          ui.write(
                              (
                                  b'Date: %s\n'
                                  % dateutil.datestr(cs.date, b'%Y/%m/%d %H:%M:%S %1%2')
                              )
                          )
                          ui.write((b'Author: %s\n' % cs.author))
                          ui.write((b'Branch: %s\n' % (cs.branch or b'HEAD')))
                          ui.write(
                              (
                                  b'Tag%s: %s \n'
                                  % (
                                      [b'', b's'][len(cs.tags) > 1],
                                      b','.join(cs.tags) or b'(none)',
                                  )
                              )
                          )
                          if cs.branchpoints:
                              ui.writenoi18n(
                                  b'Branchpoints: %s \n' % b', '.join(sorted(cs.branchpoints))
                              )
                          if opts[b"parents"] and cs.parents:
                              if len(cs.parents) > 1:
                                  ui.write(
                                      (
                                          b'Parents: %s\n'
                                          % (b','.join([(b"%d" % p.id) for p in cs.parents]))
                                      )
                                  )
                              else:
                                  ui.write((b'Parent: %d\n' % cs.parents[0].id))
                          if opts[b"ancestors"]:
                              b = cs.branch
                              r = []
                              while b:
                                  b, c = ancestors[b]
                                  r.append(b'%s:%d:%d' % (b or b"HEAD", c, branches[b]))
                              if r:
                                  ui.write((b'Ancestors: %s\n' % (b','.join(r))))
                          ui.writenoi18n(b'Log:\n')
                          ui.write(b'%s\n\n' % cs.comment)
                          ui.writenoi18n(b'Members: \n')
                          for f in cs.entries:
                              fn = f.file
                              if fn.startswith(opts[b"prefix"]):
                                  fn = fn[len(opts[b"prefix"]) :]
                              ui.write(
                                  b'\t%s:%s->%s%s \n'
                                  % (
                                      fn,
                                      b'.'.join([b"%d" % x for x in f.parent]) or b'INITIAL',
                                      b'.'.join([(b"%d" % x) for x in f.revision]),
                                      [b'', b'(DEAD)'][f.dead],
                                  )
                              )
                          ui.write(b'\n')
                      # have we seen the start tag?
                      if revisions and off:
                          if revisions[0] == (b"%d" % cs.id) or revisions[0] in cs.tags:
                              off = False
                      # see if we reached the end tag
                      if len(revisions) > 1 and not off:
                          if revisions[1] == (b"%d" % cs.id) or revisions[1] in cs.tags:
                              break

hgext/fsmonitor/__init__.py

0 +4 -4

              # __init__.py - fsmonitor initialization and overrides
              #
              # Copyright 2013-2016 Facebook, Inc.
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              '''Faster status operations with the Watchman file monitor (EXPERIMENTAL)
              Integrates the file-watching program Watchman with Mercurial to produce faster
              status results.
              On a particular Linux system, for a real-world repository with over 400,000
              files hosted on ext4, vanilla `hg status` takes 1.3 seconds. On the same
              system, with fsmonitor it takes about 0.3 seconds.
              fsmonitor requires no configuration -- it will tell Watchman about your
              repository as necessary. You'll need to install Watchman from
              https://facebook.github.io/watchman/ and make sure it is in your PATH.
              fsmonitor is incompatible with the largefiles and eol extensions, and
              will disable itself if any of those are active.
              The following configuration options exist:
              ::
                  [fsmonitor]
                  mode = {off, on, paranoid}
              When `mode = off`, fsmonitor will disable itself (similar to not loading the
              extension at all). When `mode = on`, fsmonitor will be enabled (the default).
              When `mode = paranoid`, fsmonitor will query both Watchman and the filesystem,
              and ensure that the results are consistent.
              ::
                  [fsmonitor]
                  timeout = (float)
              A value, in seconds, that determines how long fsmonitor will wait for Watchman
              to return results. Defaults to `2.0`.
              ::
                  [fsmonitor]
                  blacklistusers = (list of userids)
              A list of usernames for which fsmonitor will disable itself altogether.
              ::
                  [fsmonitor]
                  walk_on_invalidate = (boolean)
              Whether or not to walk the whole repo ourselves when our cached state has been
              invalidated, for example when Watchman has been restarted or .hgignore rules
              have been changed. Walking the repo in that case can result in competing for
              I/O with Watchman. For large repos it is recommended to set this value to
              false. You may wish to set this to true if you have a very fast filesystem
              that can outpace the IPC overhead of getting the result data for the full repo
              from Watchman. Defaults to false.
              ::
                  [fsmonitor]
                  warn_when_unused = (boolean)
              Whether to print a warning during certain operations when fsmonitor would be
              beneficial to performance but isn't enabled.
              ::
                  [fsmonitor]
                  warn_update_file_count = (integer)
                  # or when mercurial is built with rust support
                  warn_update_file_count_rust = (integer)
              If ``warn_when_unused`` is set and fsmonitor isn't enabled, a warning will
              be printed during working directory updates if this many files will be
              created.
              '''
              # Platforms Supported
              # ===================
              #
              # **Linux:** *Stable*. Watchman and fsmonitor are both known to work reliably,
              #   even under severe loads.
              #
              # **Mac OS X:** *Stable*. The Mercurial test suite passes with fsmonitor
              #   turned on, on case-insensitive HFS+. There has been a reasonable amount of
              #   user testing under normal loads.
              #
              # **Solaris, BSD:** *Alpha*. watchman and fsmonitor are believed to work, but
              #   very little testing has been done.
              #
              # **Windows:** *Alpha*. Not in a release version of watchman or fsmonitor yet.
              #
              # Known Issues
              # ============
              #
              # * fsmonitor will disable itself if any of the following extensions are
              #   enabled: largefiles, inotify, eol; or if the repository has subrepos.
              # * fsmonitor will produce incorrect results if nested repos that are not
              #   subrepos exist. *Workaround*: add nested repo paths to your `.hgignore`.
              #
              # The issues related to nested repos and subrepos are probably not fundamental
              # ones. Patches to fix them are welcome.
              import codecs
              import os
              import stat
              import sys
              import tempfile
              import weakref
              from mercurial.i18n import _
              from mercurial.node import hex
              from mercurial.pycompat import open
              from mercurial import (
                  context,
                  encoding,
                  error,
                  extensions,
                  localrepo,
                  merge,
                  pathutil,
                  pycompat,
                  registrar,
                  scmutil,
                  util,
              )
              # no-check-code because we're accessing private information only public in pure
              from mercurial.pure import parsers
              from mercurial import match as matchmod
              from mercurial.utils import (
                  hashutil,
                  stringutil,
              )
              from . import (
                  pywatchman,
                  state,
                  watchmanclient,
              )
              # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
              # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
              # be specifying the version(s) of Mercurial they are tested with, or
              # leave the attribute unspecified.
              testedwith = b'ships-with-hg-core'
              configtable = {}
              configitem = registrar.configitem(configtable)
              configitem(
                  b'fsmonitor',
                  b'mode',
                  default=b'on',
              )
              configitem(
                  b'fsmonitor',
                  b'walk_on_invalidate',
                  default=False,
              )
              configitem(
                  b'fsmonitor',
                  b'timeout',
                  default=b'2',
              )
              configitem(
                  b'fsmonitor',
                  b'blacklistusers',
                  default=list,
              )
              configitem(
                  b'fsmonitor',
                  b'watchman_exe',
                  default=b'watchman',
              )
              configitem(
                  b'fsmonitor',
                  b'verbose',
                  default=True,
                  experimental=True,
              )
              configitem(
                  b'experimental',
                  b'fsmonitor.transaction_notify',
                  default=False,
              )
              # This extension is incompatible with the following blacklisted extensions
              # and will disable itself when encountering one of these:
              _blacklist = [b'largefiles', b'eol']
              def debuginstall(ui, fm):
                  fm.write(
                      b"fsmonitor-watchman",
                      _(b"fsmonitor checking for watchman binary... (%s)\n"),
                      ui.configpath(b"fsmonitor", b"watchman_exe"),
                  )
                  root = tempfile.mkdtemp()
                  c = watchmanclient.client(ui, root)
                  err = None
                  try:
                      v = c.command(b"version")
                      fm.write(
                          b"fsmonitor-watchman-version",
                          _(b" watchman binary version %s\n"),
                          pycompat.bytestr(v["version"]),
                      )
                  except watchmanclient.Unavailable as e:
                      err = stringutil.forcebytestr(e)
                  fm.condwrite(
                      err,
                      b"fsmonitor-watchman-error",
                      _(b" watchman binary missing or broken: %s\n"),
                      err,
                  )
                  return 1 if err else 0
              def _handleunavailable(ui, state, ex):
                  """Exception handler for Watchman interaction exceptions"""
                  if isinstance(ex, watchmanclient.Unavailable):
                      # experimental config: fsmonitor.verbose
                      if ex.warn and ui.configbool(b'fsmonitor', b'verbose'):
                          if b'illegal_fstypes' not in stringutil.forcebytestr(ex):
                              ui.warn(stringutil.forcebytestr(ex) + b'\n')
                      if ex.invalidate:
                          state.invalidate()
                      # experimental config: fsmonitor.verbose
                      if ui.configbool(b'fsmonitor', b'verbose'):
                          ui.log(
                              b'fsmonitor',
                              b'Watchman unavailable: %s\n',
                              stringutil.forcebytestr(ex.msg),
                          )
                  else:
                      ui.log(
                          b'fsmonitor',
                          b'Watchman exception: %s\n',
                          stringutil.forcebytestr(ex),
                      )
              def _hashignore(ignore):
                  """Calculate hash for ignore patterns and filenames
                  If this information changes between Mercurial invocations, we can't
                  rely on Watchman information anymore and have to re-scan the working
                  copy.
                  """
                  sha1 = hashutil.sha1()
                  sha1.update(pycompat.byterepr(ignore))
                  return pycompat.sysbytes(sha1.hexdigest())
              _watchmanencoding = pywatchman.encoding.get_local_encoding()
              _fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
              _fixencoding = codecs.lookup(_watchmanencoding) != codecs.lookup(_fsencoding)
              def _watchmantofsencoding(path):
                  """Fix path to match watchman and local filesystem encoding
                  watchman's paths encoding can differ from filesystem encoding. For example,
                  on Windows, it's always utf-8.
                  """
                  try:
                      decoded = path.decode(_watchmanencoding)
                  except UnicodeDecodeError as e:
                      raise error.Abort(
                          stringutil.forcebytestr(e), hint=b'watchman encoding error'
                      )
                  try:
                      encoded = decoded.encode(_fsencoding, 'strict')
                  except UnicodeEncodeError as e:
                      raise error.Abort(stringutil.forcebytestr(e))
                  return encoded
              def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True):
                  """Replacement for dirstate.walk, hooking into Watchman.
                  Whenever full is False, ignored is False, and the Watchman client is
                  available, use Watchman combined with saved state to possibly return only a
                  subset of files."""
                  def bail(reason):
                      self._ui.debug(b'fsmonitor: fallback to core status, %s\n' % reason)
                      return orig(match, subrepos, unknown, ignored, full=True)
                  if full:
                      return bail(b'full rewalk requested')
                  if ignored:
                      return bail(b'listing ignored files')
                  if not self._watchmanclient.available():
                      return bail(b'client unavailable')
                  state = self._fsmonitorstate
                  clock, ignorehash, notefiles = state.get()
                  if not clock:
                      if state.walk_on_invalidate:
                          return bail(b'no clock')
                      # Initial NULL clock value, see
                      # https://facebook.github.io/watchman/docs/clockspec.html
                      clock = b'c:0:0'
                      notefiles = []
                  ignore = self._ignore
                  dirignore = self._dirignore
                  if unknown:
                      if _hashignore(ignore) != ignorehash and clock != b'c:0:0':
                          # ignore list changed -- can't rely on Watchman state any more
                          if state.walk_on_invalidate:
                              return bail(b'ignore rules changed')
                          notefiles = []
                          clock = b'c:0:0'
                  else:
                      # always ignore
                      ignore = util.always
                      dirignore = util.always
                  matchfn = match.matchfn
                  matchalways = match.always()
                  dmap = self._map
-                 if hasattr(dmap, b'_map'):
+                 if hasattr(dmap, '_map'):
                      # for better performance, directly access the inner dirstate map if the
                      # standard dirstate implementation is in use.
                      dmap = dmap._map
                  has_mtime = parsers.DIRSTATE_V2_HAS_MTIME
                  mtime_is_ambiguous = parsers.DIRSTATE_V2_MTIME_SECOND_AMBIGUOUS
                  mask = has_mtime | mtime_is_ambiguous
                  # All entries that may not be clean
                  nonnormalset = {
                      f
                      for f, e in self._map.items()
                      if not e.maybe_clean
                      # same as "not has_time or has_ambiguous_time", but factored to only
                      # need a single access to flags for performance.
                      # `mask` removes all irrelevant bits, then we flip the `mtime` bit so
                      # its `true` value is NOT having a mtime, then check if either bit
                      # is set.
                      or bool((e.v2_data()[0] & mask) ^ has_mtime)
                  }
                  copymap = self._map.copymap
                  getkind = stat.S_IFMT
                  dirkind = stat.S_IFDIR
                  regkind = stat.S_IFREG
                  lnkkind = stat.S_IFLNK
                  join = self._join
                  normcase = util.normcase
                  fresh_instance = False
                  exact = skipstep3 = False
                  if match.isexact():  # match.exact
                      exact = True
                      dirignore = util.always  # skip step 2
                  elif match.prefix():  # match.match, no patterns
                      skipstep3 = True
                  if not exact and self._checkcase:
                      # note that even though we could receive directory entries, we're only
                      # interested in checking if a file with the same name exists. So only
                      # normalize files if possible.
                      normalize = self._normalizefile
                      skipstep3 = False
                  else:
                      normalize = None
                  # step 1: find all explicit files
                  results, work, dirsnotfound = self._walkexplicit(match, subrepos)
                  skipstep3 = skipstep3 and not (work or dirsnotfound)
                  work = [d for d in work if not dirignore(d[0])]
                  if not work and (exact or skipstep3):
                      for s in subrepos:
                          del results[s]
                      del results[b'.hg']
                      return results
                  # step 2: query Watchman
                  try:
                      # Use the user-configured timeout for the query.
                      # Add a little slack over the top of the user query to allow for
                      # overheads while transferring the data
                      self._watchmanclient.settimeout(state.timeout + 0.1)
                      result = self._watchmanclient.command(
                          b'query',
                          {
                              b'fields': [b'mode', b'mtime', b'size', b'exists', b'name'],
                              b'since': clock,
                              b'expression': [
                                  b'not',
                                  [
                                      b'anyof',
                                      [b'dirname', b'.hg'],
                                      [b'name', b'.hg', b'wholename'],
                                  ],
                              ],
                              b'sync_timeout': int(state.timeout * 1000),
                              b'empty_on_fresh_instance': state.walk_on_invalidate,
                          },
                      )
                  except Exception as ex:
                      _handleunavailable(self._ui, state, ex)
                      self._watchmanclient.clearconnection()
                      return bail(b'exception during run')
                  else:
                      # We need to propagate the last observed clock up so that we
                      # can use it for our next query
                      state.setlastclock(pycompat.sysbytes(result[b'clock']))
                      if result[b'is_fresh_instance']:
                          if state.walk_on_invalidate:
                              state.invalidate()
                              return bail(b'fresh instance')
                          fresh_instance = True
                          # Ignore any prior noteable files from the state info
                          notefiles = []
                  # for file paths which require normalization and we encounter a case
                  # collision, we store our own foldmap
                  if normalize:
                      foldmap = {normcase(k): k for k in results}
                  switch_slashes = pycompat.ossep == b'\\'
                  # The order of the results is, strictly speaking, undefined.
                  # For case changes on a case insensitive filesystem we may receive
                  # two entries, one with exists=True and another with exists=False.
                  # The exists=True entries in the same response should be interpreted
                  # as being happens-after the exists=False entries due to the way that
                  # Watchman tracks files.  We use this property to reconcile deletes
                  # for name case changes.
                  for entry in result[b'files']:
                      fname = entry[b'name']
                      # Watchman always give us a str. Normalize to bytes on Python 3
                      # using Watchman's encoding, if needed.
                      if not isinstance(fname, bytes):
                          fname = fname.encode(_watchmanencoding)
                      if _fixencoding:
                          fname = _watchmantofsencoding(fname)
                      if switch_slashes:
                          fname = fname.replace(b'\\', b'/')
                      if normalize:
                          normed = normcase(fname)
                          fname = normalize(fname, True, True)
                          foldmap[normed] = fname
                      fmode = entry[b'mode']
                      fexists = entry[b'exists']
                      kind = getkind(fmode)
                      if b'/.hg/' in fname or fname.endswith(b'/.hg'):
                          return bail(b'nested-repo-detected')
                      if not fexists:
                          # if marked as deleted and we don't already have a change
                          # record, mark it as deleted.  If we already have an entry
                          # for fname then it was either part of walkexplicit or was
                          # an earlier result that was a case change
                          if (
                              fname not in results
                              and fname in dmap
                              and (matchalways or matchfn(fname))
                          ):
                              results[fname] = None
                      elif kind == dirkind:
                          if fname in dmap and (matchalways or matchfn(fname)):
                              results[fname] = None
                      elif kind == regkind or kind == lnkkind:
                          if fname in dmap:
                              if matchalways or matchfn(fname):
                                  results[fname] = entry
                          elif (matchalways or matchfn(fname)) and not ignore(fname):
                              results[fname] = entry
                      elif fname in dmap and (matchalways or matchfn(fname)):
                          results[fname] = None
                  # step 3: query notable files we don't already know about
                  # XXX try not to iterate over the entire dmap
                  if normalize:
                      # any notable files that have changed case will already be handled
                      # above, so just check membership in the foldmap
                      notefiles = {
                          normalize(f, True, True)
                          for f in notefiles
                          if normcase(f) not in foldmap
                      }
                  visit = {
                      f
                      for f in notefiles
                      if (f not in results and matchfn(f) and (f in dmap or not ignore(f)))
                  }
                  if not fresh_instance:
                      if matchalways:
                          visit.update(f for f in nonnormalset if f not in results)
                          visit.update(f for f in copymap if f not in results)
                      else:
                          visit.update(
                              f for f in nonnormalset if f not in results and matchfn(f)
                          )
                          visit.update(f for f in copymap if f not in results and matchfn(f))
                  else:
                      if matchalways:
                          visit.update(f for f, st in dmap.items() if f not in results)
                          visit.update(f for f in copymap if f not in results)
                      else:
                          visit.update(
                              f for f, st in dmap.items() if f not in results and matchfn(f)
                          )
                          visit.update(f for f in copymap if f not in results and matchfn(f))
                  audit = pathutil.pathauditor(self._root, cached=True).check
                  auditpass = [f for f in visit if audit(f)]
                  auditpass.sort()
                  auditfail = visit.difference(auditpass)
                  for f in auditfail:
                      results[f] = None
                  nf = iter(auditpass)
                  for st in util.statfiles([join(f) for f in auditpass]):
                      f = next(nf)
                      if st or f in dmap:
                          results[f] = st
                  for s in subrepos:
                      del results[s]
                  del results[b'.hg']
                  return results
              def overridestatus(
                  orig,
                  self,
                  node1=b'.',
                  node2=None,
                  match=None,
                  ignored=False,
                  clean=False,
                  unknown=False,
                  listsubrepos=False,
              ):
                  listignored = ignored
                  listclean = clean
                  listunknown = unknown
                  def _cmpsets(l1, l2):
                      try:
                          if b'FSMONITOR_LOG_FILE' in encoding.environ:
                              fn = encoding.environ[b'FSMONITOR_LOG_FILE']
                              f = open(fn, b'wb')
                          else:
                              fn = b'fsmonitorfail.log'
                              f = self.vfs.open(fn, b'wb')
                      except (IOError, OSError):
                          self.ui.warn(_(b'warning: unable to write to %s\n') % fn)
                          return
                      try:
                          for i, (s1, s2) in enumerate(zip(l1, l2)):
                              if set(s1) != set(s2):
                                  f.write(b'sets at position %d are unequal\n' % i)
                                  f.write(b'watchman returned: %r\n' % s1)
                                  f.write(b'stat returned: %r\n' % s2)
                      finally:
                          f.close()
                  if isinstance(node1, context.changectx):
                      ctx1 = node1
                  else:
                      ctx1 = self[node1]
                  if isinstance(node2, context.changectx):
                      ctx2 = node2
                  else:
                      ctx2 = self[node2]
                  working = ctx2.rev() is None
                  parentworking = working and ctx1 == self[b'.']
                  match = match or matchmod.always()
                  # Maybe we can use this opportunity to update Watchman's state.
                  # Mercurial uses workingcommitctx and/or memctx to represent the part of
                  # the workingctx that is to be committed. So don't update the state in
                  # that case.
                  # HG_PENDING is set in the environment when the dirstate is being updated
                  # in the middle of a transaction; we must not update our state in that
                  # case, or we risk forgetting about changes in the working copy.
                  updatestate = (
                      parentworking
                      and match.always()
                      and not isinstance(ctx2, (context.workingcommitctx, context.memctx))
                      and b'HG_PENDING' not in encoding.environ
                  )
                  try:
                      if self._fsmonitorstate.walk_on_invalidate:
                          # Use a short timeout to query the current clock.  If that
                          # takes too long then we assume that the service will be slow
                          # to answer our query.
                          # walk_on_invalidate indicates that we prefer to walk the
                          # tree ourselves because we can ignore portions that Watchman
                          # cannot and we tend to be faster in the warmer buffer cache
                          # cases.
                          self._watchmanclient.settimeout(0.1)
                      else:
                          # Give Watchman more time to potentially complete its walk
                          # and return the initial clock.  In this mode we assume that
                          # the filesystem will be slower than parsing a potentially
                          # very large Watchman result set.
                          self._watchmanclient.settimeout(self._fsmonitorstate.timeout + 0.1)
                      startclock = self._watchmanclient.getcurrentclock()
                  except Exception as ex:
                      self._watchmanclient.clearconnection()
                      _handleunavailable(self.ui, self._fsmonitorstate, ex)
                      # boo, Watchman failed. bail
                      return orig(
                          node1,
                          node2,
                          match,
                          listignored,
                          listclean,
                          listunknown,
                          listsubrepos,
                      )
                  if updatestate:
                      # We need info about unknown files. This may make things slower the
                      # first time, but whatever.
                      stateunknown = True
                  else:
                      stateunknown = listunknown
                  if updatestate:
                      ps = poststatus(startclock)
                      self.addpostdsstatus(ps)
                  r = orig(
                      node1, node2, match, listignored, listclean, stateunknown, listsubrepos
                  )
                  modified, added, removed, deleted, unknown, ignored, clean = r
                  if not listunknown:
                      unknown = []
                  # don't do paranoid checks if we're not going to query Watchman anyway
                  full = listclean or match.traversedir is not None
                  if self._fsmonitorstate.mode == b'paranoid' and not full:
                      # run status again and fall back to the old walk this time
                      self.dirstate._fsmonitordisable = True
                      # shut the UI up
                      quiet = self.ui.quiet
                      self.ui.quiet = True
                      fout, ferr = self.ui.fout, self.ui.ferr
                      self.ui.fout = self.ui.ferr = open(os.devnull, b'wb')
                      try:
                          rv2 = orig(
                              node1,
                              node2,
                              match,
                              listignored,
                              listclean,
                              listunknown,
                              listsubrepos,
                          )
                      finally:
                          self.dirstate._fsmonitordisable = False
                          self.ui.quiet = quiet
                          self.ui.fout, self.ui.ferr = fout, ferr
                      # clean isn't tested since it's set to True above
                      with self.wlock():
                          _cmpsets(
                              [modified, added, removed, deleted, unknown, ignored, clean],
                              rv2,
                          )
                      modified, added, removed, deleted, unknown, ignored, clean = rv2
                  return scmutil.status(
                      modified, added, removed, deleted, unknown, ignored, clean
                  )
              class poststatus:
                  def __init__(self, startclock):
                      self._startclock = pycompat.sysbytes(startclock)
                  def __call__(self, wctx, status):
                      clock = wctx.repo()._fsmonitorstate.getlastclock() or self._startclock
                      hashignore = _hashignore(wctx.repo().dirstate._ignore)
                      notefiles = (
                          status.modified
                          + status.added
                          + status.removed
                          + status.deleted
                          + status.unknown
                      )
                      wctx.repo()._fsmonitorstate.set(clock, hashignore, notefiles)
              def makedirstate(repo, dirstate):
                  class fsmonitordirstate(dirstate.__class__):
                      def _fsmonitorinit(self, repo):
                          # _fsmonitordisable is used in paranoid mode
                          self._fsmonitordisable = False
                          self._fsmonitorstate = repo._fsmonitorstate
                          self._watchmanclient = repo._watchmanclient
                          self._repo = weakref.proxy(repo)
                      def walk(self, *args, **kwargs):
                          orig = super(fsmonitordirstate, self).walk
                          if self._fsmonitordisable:
                              return orig(*args, **kwargs)
                          return overridewalk(orig, self, *args, **kwargs)
                      def rebuild(self, *args, **kwargs):
                          self._fsmonitorstate.invalidate()
                          return super(fsmonitordirstate, self).rebuild(*args, **kwargs)
                      def invalidate(self, *args, **kwargs):
                          self._fsmonitorstate.invalidate()
                          return super(fsmonitordirstate, self).invalidate(*args, **kwargs)
                  dirstate.__class__ = fsmonitordirstate
                  dirstate._fsmonitorinit(repo)
              def wrapdirstate(orig, self):
                  ds = orig(self)
                  # only override the dirstate when Watchman is available for the repo
-                 if hasattr(self, b'_fsmonitorstate'):
+                 if hasattr(self, '_fsmonitorstate'):
                      makedirstate(self, ds)
                  return ds
              def extsetup(ui):
                  extensions.wrapfilecache(
                      localrepo.localrepository, b'dirstate', wrapdirstate
                  )
                  if pycompat.isdarwin:
                      # An assist for avoiding the dangling-symlink fsevents bug
                      extensions.wrapfunction(os, 'symlink', wrapsymlink)
                  extensions.wrapfunction(merge, '_update', wrapupdate)
              def wrapsymlink(orig, source, link_name):
                  """if we create a dangling symlink, also touch the parent dir
                  to encourage fsevents notifications to work more correctly"""
                  try:
                      return orig(source, link_name)
                  finally:
                      try:
                          os.utime(os.path.dirname(link_name), None)
                      except OSError:
                          pass
              class state_update:
                  """This context manager is responsible for dispatching the state-enter
                  and state-leave signals to the watchman service. The enter and leave
                  methods can be invoked manually (for scenarios where context manager
                  semantics are not possible). If parameters oldnode and newnode are None,
                  they will be populated based on current working copy in enter and
                  leave, respectively. Similarly, if the distance is none, it will be
                  calculated based on the oldnode and newnode in the leave method."""
                  def __init__(
                      self,
                      repo,
                      name,
                      oldnode=None,
                      newnode=None,
                      distance=None,
                      partial=False,
                  ):
                      self.repo = repo.unfiltered()
                      self.name = name
                      self.oldnode = oldnode
                      self.newnode = newnode
                      self.distance = distance
                      self.partial = partial
                      self._lock = None
                      self.need_leave = False
                  def __enter__(self):
                      self.enter()
                  def enter(self):
                      # Make sure we have a wlock prior to sending notifications to watchman.
                      # We don't want to race with other actors. In the update case,
                      # merge.update is going to take the wlock almost immediately. We are
                      # effectively extending the lock around several short sanity checks.
                      if self.oldnode is None:
                          self.oldnode = self.repo[b'.'].node()
                      if self.repo.currentwlock() is None:
-                         if hasattr(self.repo, b'wlocknostateupdate'):
+                         if hasattr(self.repo, 'wlocknostateupdate'):
                              self._lock = self.repo.wlocknostateupdate()
                          else:
                              self._lock = self.repo.wlock()
                      self.need_leave = self._state(b'state-enter', hex(self.oldnode))
                      return self
                  def __exit__(self, type_, value, tb):
                      abort = True if type_ else False
                      self.exit(abort=abort)
                  def exit(self, abort=False):
                      try:
                          if self.need_leave:
                              status = b'failed' if abort else b'ok'
                              if self.newnode is None:
                                  self.newnode = self.repo[b'.'].node()
                              if self.distance is None:
                                  self.distance = calcdistance(
                                      self.repo, self.oldnode, self.newnode
                                  )
                              self._state(b'state-leave', hex(self.newnode), status=status)
                      finally:
                          self.need_leave = False
                          if self._lock:
                              self._lock.release()
                  def _state(self, cmd, commithash, status=b'ok'):
-                     if not hasattr(self.repo, b'_watchmanclient'):
+                     if not hasattr(self.repo, '_watchmanclient'):
                          return False
                      try:
                          self.repo._watchmanclient.command(
                              cmd,
                              {
                                  b'name': self.name,
                                  b'metadata': {
                                      # the target revision
                                      b'rev': commithash,
                                      # approximate number of commits between current and target
                                      b'distance': self.distance if self.distance else 0,
                                      # success/failure (only really meaningful for state-leave)
                                      b'status': status,
                                      # whether the working copy parent is changing
                                      b'partial': self.partial,
                                  },
                              },
                          )
                          return True
                      except Exception as e:
                          # Swallow any errors; fire and forget
                          self.repo.ui.log(
                              b'watchman', b'Exception %s while running %s\n', e, cmd
                          )
                          return False
              # Estimate the distance between two nodes
              def calcdistance(repo, oldnode, newnode):
                  anc = repo.changelog.ancestor(oldnode, newnode)
                  ancrev = repo[anc].rev()
                  distance = abs(repo[oldnode].rev() - ancrev) + abs(
                      repo[newnode].rev() - ancrev
                  )
                  return distance
              # Bracket working copy updates with calls to the watchman state-enter
              # and state-leave commands.  This allows clients to perform more intelligent
              # settling during bulk file change scenarios
              # https://facebook.github.io/watchman/docs/cmd/subscribe.html#advanced-settling
              def wrapupdate(
                  orig,
                  repo,
                  node,
                  branchmerge,
                  force,
                  ancestor=None,
                  mergeancestor=False,
                  labels=None,
                  matcher=None,
                  **kwargs
              ):
                  distance = 0
                  partial = True
                  oldnode = repo[b'.'].node()
                  newnode = repo[node].node()
                  if matcher is None or matcher.always():
                      partial = False
                      distance = calcdistance(repo.unfiltered(), oldnode, newnode)
                  with state_update(
                      repo,
                      name=b"hg.update",
                      oldnode=oldnode,
                      newnode=newnode,
                      distance=distance,
                      partial=partial,
                  ):
                      return orig(
                          repo,
                          node,
                          branchmerge,
                          force,
                          ancestor,
                          mergeancestor,
                          labels,
                          matcher,
                          **kwargs
                      )
              def repo_has_depth_one_nested_repo(repo):
                  for f in repo.wvfs.listdir():
                      if os.path.isdir(os.path.join(repo.root, f, b'.hg')):
                          msg = b'fsmonitor: sub-repository %r detected, fsmonitor disabled\n'
                          repo.ui.debug(msg % f)
                          return True
                  return False
              def reposetup(ui, repo):
                  # We don't work with largefiles or inotify
                  exts = extensions.enabled()
                  for ext in _blacklist:
                      if ext in exts:
                          ui.warn(
                              _(
                                  b'The fsmonitor extension is incompatible with the %s '
                                  b'extension and has been disabled.\n'
                              )
                              % ext
                          )
                          return
                  if repo.local():
                      # We don't work with subrepos either.
                      #
                      # if repo[None].substate can cause a dirstate parse, which is too
                      # slow. Instead, look for a file called hgsubstate,
                      if repo.wvfs.exists(b'.hgsubstate') or repo.wvfs.exists(b'.hgsub'):
                          return
                      if repo_has_depth_one_nested_repo(repo):
                          return
                      fsmonitorstate = state.state(repo)
                      if fsmonitorstate.mode == b'off':
                          return
                      try:
                          client = watchmanclient.client(repo.ui, repo.root)
                      except Exception as ex:
                          _handleunavailable(ui, fsmonitorstate, ex)
                          return
                      repo._fsmonitorstate = fsmonitorstate
                      repo._watchmanclient = client
                      dirstate, cached = localrepo.isfilecached(repo, b'dirstate')
                      if cached:
                          # at this point since fsmonitorstate wasn't present,
                          # repo.dirstate is not a fsmonitordirstate
                          makedirstate(repo, dirstate)
                      class fsmonitorrepo(repo.__class__):
                          def status(self, *args, **kwargs):
                              orig = super(fsmonitorrepo, self).status
                              return overridestatus(orig, self, *args, **kwargs)
                          def wlocknostateupdate(self, *args, **kwargs):
                              return super(fsmonitorrepo, self).wlock(*args, **kwargs)
                          def wlock(self, *args, **kwargs):
                              l = super(fsmonitorrepo, self).wlock(*args, **kwargs)
                              if not ui.configbool(
                                  b"experimental", b"fsmonitor.transaction_notify"
                              ):
                                  return l
                              if l.held != 1:
                                  return l
                              origrelease = l.releasefn
                              def staterelease():
                                  if origrelease:
                                      origrelease()
                                  if l.stateupdate:
                                      l.stateupdate.exit()
                                      l.stateupdate = None
                              try:
                                  l.stateupdate = None
                                  l.stateupdate = state_update(self, name=b"hg.transaction")
                                  l.stateupdate.enter()
                                  l.releasefn = staterelease
                              except Exception as e:
                                  # Swallow any errors; fire and forget
                                  self.ui.log(
                                      b'watchman', b'Exception in state update %s\n', e
                                  )
                              return l
                      repo.__class__ = fsmonitorrepo

hgext/remotefilelog/basestore.py

0 +1 -1

              import os
              import shutil
              import stat
              import time
              from mercurial.i18n import _
              from mercurial.node import bin, hex
              from mercurial.pycompat import open
              from mercurial import (
                  error,
                  pycompat,
                  util,
              )
              from mercurial.utils import hashutil
              from . import (
                  constants,
                  shallowutil,
              )
              class basestore:
                  def __init__(self, repo, path, reponame, shared=False):
                      """Creates a remotefilelog store object for the given repo name.
                      `path` - The file path where this store keeps its data
                      `reponame` - The name of the repo. This is used to partition data from
                      many repos.
                      `shared` - True if this store is a shared cache of data from the central
                      server, for many repos on this machine. False means this store is for
                      the local data for one repo.
                      """
                      self.repo = repo
                      self.ui = repo.ui
                      self._path = path
                      self._reponame = reponame
                      self._shared = shared
                      self._uid = os.getuid() if not pycompat.iswindows else None
                      self._validatecachelog = self.ui.config(
                          b"remotefilelog", b"validatecachelog"
                      )
                      self._validatecache = self.ui.config(
                          b"remotefilelog", b"validatecache", b'on'
                      )
                      if self._validatecache not in (b'on', b'strict', b'off'):
                          self._validatecache = b'on'
                      if self._validatecache == b'off':
                          self._validatecache = False
                      if shared:
                          shallowutil.mkstickygroupdir(self.ui, path)
                  def getmissing(self, keys):
                      missing = []
                      for name, node in keys:
                          filepath = self._getfilepath(name, node)
                          exists = os.path.exists(filepath)
                          if (
                              exists
                              and self._validatecache == b'strict'
                              and not self._validatekey(filepath, b'contains')
                          ):
                              exists = False
                          if not exists:
                              missing.append((name, node))
                      return missing
                  # BELOW THIS ARE IMPLEMENTATIONS OF REPACK SOURCE
                  def markledger(self, ledger, options=None):
                      if options and options.get(constants.OPTION_PACKSONLY):
                          return
                      if self._shared:
                          for filename, nodes in self._getfiles():
                              for node in nodes:
                                  ledger.markdataentry(self, filename, node)
                                  ledger.markhistoryentry(self, filename, node)
                  def cleanup(self, ledger):
                      ui = self.ui
                      entries = ledger.sources.get(self, [])
                      count = 0
                      progress = ui.makeprogress(
                          _(b"cleaning up"), unit=b"files", total=len(entries)
                      )
                      for entry in entries:
                          if entry.gced or (entry.datarepacked and entry.historyrepacked):
                              progress.update(count)
                              path = self._getfilepath(entry.filename, entry.node)
                              util.tryunlink(path)
                          count += 1
                      progress.complete()
                      # Clean up the repo cache directory.
                      self._cleanupdirectory(self._getrepocachepath())
                  # BELOW THIS ARE NON-STANDARD APIS
                  def _cleanupdirectory(self, rootdir):
                      """Removes the empty directories and unnecessary files within the root
                      directory recursively. Note that this method does not remove the root
                      directory itself."""
                      oldfiles = set()
                      otherfiles = set()
                      # osutil.listdir returns stat information which saves some rmdir/listdir
                      # syscalls.
                      for name, mode in util.osutil.listdir(rootdir):
                          if stat.S_ISDIR(mode):
                              dirpath = os.path.join(rootdir, name)
                              self._cleanupdirectory(dirpath)
                              # Now that the directory specified by dirpath is potentially
                              # empty, try and remove it.
                              try:
                                  os.rmdir(dirpath)
                              except OSError:
                                  pass
                          elif stat.S_ISREG(mode):
                              if name.endswith(b'_old'):
                                  oldfiles.add(name[:-4])
                              else:
                                  otherfiles.add(name)
                      # Remove the files which end with suffix '_old' and have no
                      # corresponding file without the suffix '_old'. See addremotefilelognode
                      # method for the generation/purpose of files with '_old' suffix.
                      for filename in oldfiles - otherfiles:
                          filepath = os.path.join(rootdir, filename + b'_old')
                          util.tryunlink(filepath)
                  def _getfiles(self):
                      """Return a list of (filename, [node,...]) for all the revisions that
                      exist in the store.
                      This is useful for obtaining a list of all the contents of the store
                      when performing a repack to another store, since the store API requires
                      name+node keys and not namehash+node keys.
                      """
                      existing = {}
                      for filenamehash, node in self._listkeys():
                          existing.setdefault(filenamehash, []).append(node)
                      filenamemap = self._resolvefilenames(existing.keys())
                      for filename, sha in filenamemap.items():
                          yield (filename, existing[sha])
                  def _resolvefilenames(self, hashes):
                      """Given a list of filename hashes that are present in the
                      remotefilelog store, return a mapping from filename->hash.
                      This is useful when converting remotefilelog blobs into other storage
                      formats.
                      """
                      if not hashes:
                          return {}
                      filenames = {}
                      missingfilename = set(hashes)
                      # Start with a full manifest, since it'll cover the majority of files
                      for filename in self.repo[b'tip'].manifest():
                          sha = hashutil.sha1(filename).digest()
                          if sha in missingfilename:
                              filenames[filename] = sha
                              missingfilename.discard(sha)
                      # Scan the changelog until we've found every file name
                      cl = self.repo.unfiltered().changelog
                      for rev in range(len(cl) - 1, -1, -1):
                          if not missingfilename:
                              break
                          files = cl.readfiles(cl.node(rev))
                          for filename in files:
                              sha = hashutil.sha1(filename).digest()
                              if sha in missingfilename:
                                  filenames[filename] = sha
                                  missingfilename.discard(sha)
                      return filenames
                  def _getrepocachepath(self):
                      return (
                          os.path.join(self._path, self._reponame)
                          if self._shared
                          else self._path
                      )
                  def _listkeys(self):
                      """List all the remotefilelog keys that exist in the store.
                      Returns a iterator of (filename hash, filecontent hash) tuples.
                      """
                      for root, dirs, files in os.walk(self._getrepocachepath()):
                          for filename in files:
                              if len(filename) != 40:
                                  continue
                              node = filename
                              if self._shared:
                                  # .../1a/85ffda..be21
                                  filenamehash = root[-41:-39] + root[-38:]
                              else:
                                  filenamehash = root[-40:]
                              yield (bin(filenamehash), bin(node))
                  def _getfilepath(self, name, node):
                      node = hex(node)
                      if self._shared:
                          key = shallowutil.getcachekey(self._reponame, name, node)
                      else:
                          key = shallowutil.getlocalkey(name, node)
                      return os.path.join(self._path, key)
                  def _getdata(self, name, node):
                      filepath = self._getfilepath(name, node)
                      try:
                          data = shallowutil.readfile(filepath)
                          if self._validatecache and not self._validatedata(data, filepath):
                              if self._validatecachelog:
                                  with open(self._validatecachelog, b'ab+') as f:
                                      f.write(b"corrupt %s during read\n" % filepath)
                              os.rename(filepath, filepath + b".corrupt")
                              raise KeyError(b"corrupt local cache file %s" % filepath)
                      except IOError:
                          raise KeyError(
                              b"no file found at %s for %s:%s" % (filepath, name, hex(node))
                          )
                      return data
                  def addremotefilelognode(self, name, node, data):
                      filepath = self._getfilepath(name, node)
                      oldumask = os.umask(0o002)
                      try:
                          # if this node already exists, save the old version for
                          # recovery/debugging purposes.
                          if os.path.exists(filepath):
                              newfilename = filepath + b'_old'
                              # newfilename can be read-only and shutil.copy will fail.
                              # Delete newfilename to avoid it
                              if os.path.exists(newfilename):
                                  shallowutil.unlinkfile(newfilename)
                              shutil.copy(filepath, newfilename)
                          shallowutil.mkstickygroupdir(self.ui, os.path.dirname(filepath))
                          shallowutil.writefile(filepath, data, readonly=True)
                          if self._validatecache:
                              if not self._validatekey(filepath, b'write'):
                                  raise error.Abort(
                                      _(b"local cache write was corrupted %s") % filepath
                                  )
                      finally:
                          os.umask(oldumask)
                  def markrepo(self, path):
                      """Call this to add the given repo path to the store's list of
                      repositories that are using it. This is useful later when doing garbage
                      collection, since it allows us to insecpt the repos to see what nodes
                      they want to be kept alive in the store.
                      """
                      repospath = os.path.join(self._path, b"repos")
                      with open(repospath, b'ab') as reposfile:
                          reposfile.write(os.path.dirname(path) + b"\n")
                      repospathstat = os.stat(repospath)
                      if repospathstat.st_uid == self._uid:
                          os.chmod(repospath, 0o0664)
                  def _validatekey(self, path, action):
                      with open(path, b'rb') as f:
                          data = f.read()
                      if self._validatedata(data, path):
                          return True
                      if self._validatecachelog:
                          with open(self._validatecachelog, b'ab+') as f:
                              f.write(b"corrupt %s during %s\n" % (path, action))
                      os.rename(path, path + b".corrupt")
                      return False
                  def _validatedata(self, data, path):
                      try:
                          if len(data) > 0:
                              # see remotefilelogserver.createfileblob for the format
                              offset, size, flags = shallowutil.parsesizeflags(data)
                              if len(data) <= size:
                                  # it is truncated
                                  return False
                              # extract the node from the metadata
                              offset += size
                              datanode = data[offset : offset + 20]
                              # and compare against the path
                              if os.path.basename(path) == hex(datanode):
                                  # Content matches the intended path
                                  return True
                              return False
                      except (ValueError, shallowutil.BadRemotefilelogHeader):
                          pass
                      return False
                  def gc(self, keepkeys):
                      ui = self.ui
                      cachepath = self._path
                      # prune cache
                      queue = pycompat.queue.PriorityQueue()
                      originalsize = 0
                      size = 0
                      count = 0
                      removed = 0
                      # keep files newer than a day even if they aren't needed
                      limit = time.time() - (60 * 60 * 24)
                      progress = ui.makeprogress(
                          _(b"removing unnecessary files"), unit=b"files"
                      )
                      progress.update(0)
                      for root, dirs, files in os.walk(cachepath):
                          for file in files:
                              if file == b'repos':
                                  continue
                              # Don't delete pack files
                              if b'/packs/' in root:
                                  continue
                              progress.update(count)
                              path = os.path.join(root, file)
                              key = os.path.relpath(path, cachepath)
                              count += 1
                              try:
                                  pathstat = os.stat(path)
                              except FileNotFoundError:
                                  msg = _(
                                      b"warning: file %s was removed by another process\n"
                                  )
                                  ui.warn(msg % path)
                                  continue
                              originalsize += pathstat.st_size
                              if key in keepkeys or pathstat.st_atime > limit:
                                  queue.put((pathstat.st_atime, path, pathstat))
                                  size += pathstat.st_size
                              else:
                                  try:
                                      shallowutil.unlinkfile(path)
                                  except FileNotFoundError:
                                      msg = _(
                                          b"warning: file %s was removed by another "
                                          b"process\n"
                                      )
                                      ui.warn(msg % path)
                                      continue
                                  removed += 1
                      progress.complete()
                      # remove oldest files until under limit
                      limit = ui.configbytes(b"remotefilelog", b"cachelimit")
                      if size > limit:
                          excess = size - limit
                          progress = ui.makeprogress(
                              _(b"enforcing cache limit"), unit=b"bytes", total=excess
                          )
                          removedexcess = 0
                          while queue and size > limit and size > 0:
                              progress.update(removedexcess)
                              atime, oldpath, oldpathstat = queue.get()
                              try:
                                  shallowutil.unlinkfile(oldpath)
                              except FileNotFoundError:
                                  msg = _(
                                      b"warning: file %s was removed by another process\n"
                                  )
                                  ui.warn(msg % oldpath)
                              size -= oldpathstat.st_size
                              removed += 1
                              removedexcess += oldpathstat.st_size
                          progress.complete()
                      ui.status(
                          _(b"finished: removed %d of %d files (%0.2f GB to %0.2f GB)\n")
                          % (
                              removed,
                              count,
                              float(originalsize) / 1024.0 / 1024.0 / 1024.0,
                              float(size) / 1024.0 / 1024.0 / 1024.0,
                          )
                      )
              class baseunionstore:
                  def __init__(self, *args, **kwargs):
                      # If one of the functions that iterates all of the stores is about to
                      # throw a KeyError, try this many times with a full refresh between
                      # attempts. A repack operation may have moved data from one store to
                      # another while we were running.
                      self.numattempts = kwargs.get('numretries', 0) + 1
                      # If not-None, call this function on every retry and if the attempts are
                      # exhausted.
                      self.retrylog = kwargs.get('retrylog', None)
                  def markforrefresh(self):
                      for store in self.stores:
-                         if hasattr(store, b'markforrefresh'):
+                         if hasattr(store, 'markforrefresh'):
                              store.markforrefresh()
                  @staticmethod
                  def retriable(fn):
                      def noop(*args):
                          pass
                      def wrapped(self, *args, **kwargs):
                          retrylog = self.retrylog or noop
                          funcname = fn.__name__
                          i = 0
                          while i < self.numattempts:
                              if i > 0:
                                  retrylog(
                                      b're-attempting (n=%d) %s\n'
                                      % (i, pycompat.sysbytes(funcname))
                                  )
                                  self.markforrefresh()
                              i += 1
                              try:
                                  return fn(self, *args, **kwargs)
                              except KeyError:
                                  if i == self.numattempts:
                                      # retries exhausted
                                      retrylog(
                                          b'retries exhausted in %s, raising KeyError\n'
                                          % pycompat.sysbytes(funcname)
                                      )
                                      raise
                      return wrapped

hgext/remotefilelog/fileserverclient.py

0 +1 -1

              # fileserverclient.py - client for communicating with the cache process
              #
              # Copyright 2013 Facebook, Inc.
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              import io
              import os
              import threading
              import time
              import zlib
              from mercurial.i18n import _
              from mercurial.node import bin, hex
              from mercurial import (
                  error,
                  pycompat,
                  revlog,
                  sshpeer,
                  util,
                  wireprotov1peer,
              )
              from mercurial.utils import (
                  hashutil,
                  procutil,
              )
              from . import (
                  constants,
                  contentstore,
                  metadatastore,
              )
              _sshv1peer = sshpeer.sshv1peer
              # Statistics for debugging
              fetchcost = 0
              fetches = 0
              fetched = 0
              fetchmisses = 0
              _lfsmod = None
              def getcachekey(reponame, file, id):
                  pathhash = hex(hashutil.sha1(file).digest())
                  return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
              def getlocalkey(file, id):
                  pathhash = hex(hashutil.sha1(file).digest())
                  return os.path.join(pathhash, id)
              def peersetup(ui, peer):
                  class remotefilepeer(peer.__class__):
                      @wireprotov1peer.batchable
                      def x_rfl_getfile(self, file, node):
                          if not self.capable(b'x_rfl_getfile'):
                              raise error.Abort(
                                  b'configured remotefile server does not support getfile'
                              )
                          def decode(d):
                              code, data = d.split(b'\0', 1)
                              if int(code):
                                  raise error.LookupError(file, node, data)
                              return data
                          return {b'file': file, b'node': node}, decode
                      @wireprotov1peer.batchable
                      def x_rfl_getflogheads(self, path):
                          if not self.capable(b'x_rfl_getflogheads'):
                              raise error.Abort(
                                  b'configured remotefile server does not '
                                  b'support getflogheads'
                              )
                          def decode(d):
                              return d.split(b'\n') if d else []
                          return {b'path': path}, decode
                      def _updatecallstreamopts(self, command, opts):
                          if command != b'getbundle':
                              return
                          if (
                              constants.NETWORK_CAP_LEGACY_SSH_GETFILES
                              not in self.capabilities()
                          ):
                              return
                          if not hasattr(self, '_localrepo'):
                              return
                          if (
                              constants.SHALLOWREPO_REQUIREMENT
                              not in self._localrepo.requirements
                          ):
                              return
                          bundlecaps = opts.get(b'bundlecaps')
                          if bundlecaps:
                              bundlecaps = [bundlecaps]
                          else:
                              bundlecaps = []
                          # shallow, includepattern, and excludepattern are a hacky way of
                          # carrying over data from the local repo to this getbundle
                          # command. We need to do it this way because bundle1 getbundle
                          # doesn't provide any other place we can hook in to manipulate
                          # getbundle args before it goes across the wire. Once we get rid
                          # of bundle1, we can use bundle2's _pullbundle2extraprepare to
                          # do this more cleanly.
                          bundlecaps.append(constants.BUNDLE2_CAPABLITY)
                          if self._localrepo.includepattern:
                              patterns = b'\0'.join(self._localrepo.includepattern)
                              includecap = b"includepattern=" + patterns
                              bundlecaps.append(includecap)
                          if self._localrepo.excludepattern:
                              patterns = b'\0'.join(self._localrepo.excludepattern)
                              excludecap = b"excludepattern=" + patterns
                              bundlecaps.append(excludecap)
                          opts[b'bundlecaps'] = b','.join(bundlecaps)
                      def _sendrequest(self, command, args, **opts):
                          self._updatecallstreamopts(command, args)
                          return super(remotefilepeer, self)._sendrequest(
                              command, args, **opts
                          )
                      def _callstream(self, command, **opts):
                          supertype = super(remotefilepeer, self)
                          if not hasattr(supertype, '_sendrequest'):
                              self._updatecallstreamopts(command, pycompat.byteskwargs(opts))
                          return super(remotefilepeer, self)._callstream(command, **opts)
                  peer.__class__ = remotefilepeer
              class cacheconnection:
                  """The connection for communicating with the remote cache. Performs
                  gets and sets by communicating with an external process that has the
                  cache-specific implementation.
                  """
                  def __init__(self):
                      self.pipeo = self.pipei = self.pipee = None
                      self.subprocess = None
                      self.connected = False
                  def connect(self, cachecommand):
                      if self.pipeo:
                          raise error.Abort(_(b"cache connection already open"))
                      self.pipei, self.pipeo, self.pipee, self.subprocess = procutil.popen4(
                          cachecommand
                      )
                      self.connected = True
                  def close(self):
                      def tryclose(pipe):
                          try:
                              pipe.close()
                          except Exception:
                              pass
                      if self.connected:
                          try:
                              self.pipei.write(b"exit\n")
                          except Exception:
                              pass
                          tryclose(self.pipei)
                          self.pipei = None
                          tryclose(self.pipeo)
                          self.pipeo = None
                          tryclose(self.pipee)
                          self.pipee = None
                          try:
                              # Wait for process to terminate, making sure to avoid deadlock.
                              # See https://docs.python.org/2/library/subprocess.html for
                              # warnings about wait() and deadlocking.
                              self.subprocess.communicate()
                          except Exception:
                              pass
                          self.subprocess = None
                      self.connected = False
                  def request(self, request, flush=True):
                      if self.connected:
                          try:
                              self.pipei.write(request)
                              if flush:
                                  self.pipei.flush()
                          except IOError:
                              self.close()
                  def receiveline(self):
                      if not self.connected:
                          return None
                      try:
                          result = self.pipeo.readline()[:-1]
                          if not result:
                              self.close()
                      except IOError:
                          self.close()
                      return result
              def _getfilesbatch(
                  remote, receivemissing, progresstick, missed, idmap, batchsize
              ):
                  # Over http(s), iterbatch is a streamy method and we can start
                  # looking at results early. This means we send one (potentially
                  # large) request, but then we show nice progress as we process
                  # file results, rather than showing chunks of $batchsize in
                  # progress.
                  #
                  # Over ssh, iterbatch isn't streamy because batch() wasn't
                  # explicitly designed as a streaming method. In the future we
                  # should probably introduce a streambatch() method upstream and
                  # use that for this.
                  with remote.commandexecutor() as e:
                      futures = []
                      for m in missed:
                          futures.append(
                              e.callcommand(
                                  b'x_rfl_getfile', {b'file': idmap[m], b'node': m[-40:]}
                              )
                          )
                      for i, m in enumerate(missed):
                          r = futures[i].result()
                          futures[i] = None  # release memory
                          file_ = idmap[m]
                          node = m[-40:]
                          receivemissing(io.BytesIO(b'%d\n%s' % (len(r), r)), file_, node)
                          progresstick()
              def _getfiles_optimistic(
                  remote, receivemissing, progresstick, missed, idmap, step
              ):
                  remote._callstream(b"x_rfl_getfiles")
                  i = 0
                  pipeo = remote._pipeo
                  pipei = remote._pipei
                  while i < len(missed):
                      # issue a batch of requests
                      start = i
                      end = min(len(missed), start + step)
                      i = end
                      for missingid in missed[start:end]:
                          # issue new request
                          versionid = missingid[-40:]
                          file = idmap[missingid]
                          sshrequest = b"%s%s\n" % (versionid, file)
                          pipeo.write(sshrequest)
                      pipeo.flush()
                      # receive batch results
                      for missingid in missed[start:end]:
                          versionid = missingid[-40:]
                          file = idmap[missingid]
                          receivemissing(pipei, file, versionid)
                          progresstick()
                  # End the command
                  pipeo.write(b'\n')
                  pipeo.flush()
              def _getfiles_threaded(
                  remote, receivemissing, progresstick, missed, idmap, step
              ):
                  remote._callstream(b"x_rfl_getfiles")
                  pipeo = remote._pipeo
                  pipei = remote._pipei
                  def writer():
                      for missingid in missed:
                          versionid = missingid[-40:]
                          file = idmap[missingid]
                          sshrequest = b"%s%s\n" % (versionid, file)
                          pipeo.write(sshrequest)
                      pipeo.flush()
                  writerthread = threading.Thread(target=writer)
                  writerthread.daemon = True
                  writerthread.start()
                  for missingid in missed:
                      versionid = missingid[-40:]
                      file = idmap[missingid]
                      receivemissing(pipei, file, versionid)
                      progresstick()
                  writerthread.join()
                  # End the command
                  pipeo.write(b'\n')
                  pipeo.flush()
              class fileserverclient:
                  """A client for requesting files from the remote file server."""
                  def __init__(self, repo):
                      ui = repo.ui
                      self.repo = repo
                      self.ui = ui
                      self.cacheprocess = ui.config(b"remotefilelog", b"cacheprocess")
                      if self.cacheprocess:
                          self.cacheprocess = util.expandpath(self.cacheprocess)
                      # This option causes remotefilelog to pass the full file path to the
                      # cacheprocess instead of a hashed key.
                      self.cacheprocesspasspath = ui.configbool(
                          b"remotefilelog", b"cacheprocess.includepath"
                      )
                      self.debugoutput = ui.configbool(b"remotefilelog", b"debug")
                      self.remotecache = cacheconnection()
                  def setstore(self, datastore, historystore, writedata, writehistory):
                      self.datastore = datastore
                      self.historystore = historystore
                      self.writedata = writedata
                      self.writehistory = writehistory
                  def _connect(self):
                      return self.repo.connectionpool.get(self.repo.fallbackpath)
                  def request(self, fileids):
                      """Takes a list of filename/node pairs and fetches them from the
                      server. Files are stored in the local cache.
                      A list of nodes that the server couldn't find is returned.
                      If the connection fails, an exception is raised.
                      """
                      if not self.remotecache.connected:
                          self.connect()
                      cache = self.remotecache
                      writedata = self.writedata
                      repo = self.repo
                      total = len(fileids)
                      request = b"get\n%d\n" % total
                      idmap = {}
                      reponame = repo.name
                      for file, id in fileids:
                          fullid = getcachekey(reponame, file, id)
                          if self.cacheprocesspasspath:
                              request += file + b'\0'
                          request += fullid + b"\n"
                          idmap[fullid] = file
                      cache.request(request)
                      progress = self.ui.makeprogress(_(b'downloading'), total=total)
                      progress.update(0)
                      missed = []
                      while True:
                          missingid = cache.receiveline()
                          if not missingid:
                              missedset = set(missed)
                              for missingid in idmap:
                                  if not missingid in missedset:
                                      missed.append(missingid)
                              self.ui.warn(
                                  _(
                                      b"warning: cache connection closed early - "
                                      + b"falling back to server\n"
                                  )
                              )
                              break
                          if missingid == b"0":
                              break
                          if missingid.startswith(b"_hits_"):
                              # receive progress reports
                              parts = missingid.split(b"_")
                              progress.increment(int(parts[2]))
                              continue
                          missed.append(missingid)
                      global fetchmisses
                      fetchmisses += len(missed)
                      fromcache = total - len(missed)
                      progress.update(fromcache, total=total)
                      self.ui.log(
                          b"remotefilelog",
                          b"remote cache hit rate is %r of %r\n",
                          fromcache,
                          total,
                          hit=fromcache,
                          total=total,
                      )
                      oldumask = os.umask(0o002)
                      try:
                          # receive cache misses from master
                          if missed:
                              # When verbose is true, sshpeer prints 'running ssh...'
                              # to stdout, which can interfere with some command
                              # outputs
                              verbose = self.ui.verbose
                              self.ui.verbose = False
                              try:
                                  with self._connect() as conn:
                                      remote = conn.peer
                                      if remote.capable(
                                          constants.NETWORK_CAP_LEGACY_SSH_GETFILES
                                      ):
                                          if not isinstance(remote, _sshv1peer):
                                              raise error.Abort(
                                                  b'remotefilelog requires ssh servers'
                                              )
                                          step = self.ui.configint(
                                              b'remotefilelog', b'getfilesstep'
                                          )
                                          getfilestype = self.ui.config(
                                              b'remotefilelog', b'getfilestype'
                                          )
                                          if getfilestype == b'threaded':
                                              _getfiles = _getfiles_threaded
                                          else:
                                              _getfiles = _getfiles_optimistic
                                          _getfiles(
                                              remote,
                                              self.receivemissing,
                                              progress.increment,
                                              missed,
                                              idmap,
                                              step,
                                          )
                                      elif remote.capable(b"x_rfl_getfile"):
                                          if remote.capable(b'batch'):
                                              batchdefault = 100
                                          else:
                                              batchdefault = 10
                                          batchsize = self.ui.configint(
                                              b'remotefilelog', b'batchsize', batchdefault
                                          )
                                          self.ui.debug(
                                              b'requesting %d files from '
                                              b'remotefilelog server...\n' % len(missed)
                                          )
                                          _getfilesbatch(
                                              remote,
                                              self.receivemissing,
                                              progress.increment,
                                              missed,
                                              idmap,
                                              batchsize,
                                          )
                                      else:
                                          raise error.Abort(
                                              b"configured remotefilelog server"
                                              b" does not support remotefilelog"
                                          )
                                  self.ui.log(
                                      b"remotefilefetchlog",
                                      b"Success\n",
                                      fetched_files=progress.pos - fromcache,
                                      total_to_fetch=total - fromcache,
                                  )
                              except Exception:
                                  self.ui.log(
                                      b"remotefilefetchlog",
                                      b"Fail\n",
                                      fetched_files=progress.pos - fromcache,
                                      total_to_fetch=total - fromcache,
                                  )
                                  raise
                              finally:
                                  self.ui.verbose = verbose
                              # send to memcache
                              request = b"set\n%d\n%s\n" % (len(missed), b"\n".join(missed))
                              cache.request(request)
                          progress.complete()
                          # mark ourselves as a user of this cache
                          writedata.markrepo(self.repo.path)
                      finally:
                          os.umask(oldumask)
                  def receivemissing(self, pipe, filename, node):
                      line = pipe.readline()[:-1]
                      if not line:
                          raise error.ResponseError(
                              _(b"error downloading file contents:"),
                              _(b"connection closed early"),
                          )
                      size = int(line)
                      data = pipe.read(size)
                      if len(data) != size:
                          raise error.ResponseError(
                              _(b"error downloading file contents:"),
                              _(b"only received %s of %s bytes") % (len(data), size),
                          )
                      self.writedata.addremotefilelognode(
                          filename, bin(node), zlib.decompress(data)
                      )
                  def connect(self):
                      if self.cacheprocess:
                          cmd = b"%s %s" % (self.cacheprocess, self.writedata._path)
                          self.remotecache.connect(cmd)
                      else:
                          # If no cache process is specified, we fake one that always
                          # returns cache misses.  This enables tests to run easily
                          # and may eventually allow us to be a drop in replacement
                          # for the largefiles extension.
                          class simplecache:
                              def __init__(self):
                                  self.missingids = []
                                  self.connected = True
                              def close(self):
                                  pass
                              def request(self, value, flush=True):
                                  lines = value.split(b"\n")
                                  if lines[0] != b"get":
                                      return
                                  self.missingids = lines[2:-1]
                                  self.missingids.append(b'0')
                              def receiveline(self):
                                  if len(self.missingids) > 0:
                                      return self.missingids.pop(0)
                                  return None
                          self.remotecache = simplecache()
                  def close(self):
                      if fetches:
                          msg = (
                              b"%d files fetched over %d fetches - "
                              + b"(%d misses, %0.2f%% hit ratio) over %0.2fs\n"
                          ) % (
                              fetched,
                              fetches,
                              fetchmisses,
                              float(fetched - fetchmisses) / float(fetched) * 100.0,
                              fetchcost,
                          )
                          if self.debugoutput:
                              self.ui.warn(msg)
                          self.ui.log(
                              b"remotefilelog.prefetch",
                              msg.replace(b"%", b"%%"),
                              remotefilelogfetched=fetched,
                              remotefilelogfetches=fetches,
                              remotefilelogfetchmisses=fetchmisses,
                              remotefilelogfetchtime=fetchcost * 1000,
                          )
                      if self.remotecache.connected:
                          self.remotecache.close()
                  def prefetch(
                      self, fileids, force=False, fetchdata=True, fetchhistory=False
                  ):
                      """downloads the given file versions to the cache"""
                      repo = self.repo
                      idstocheck = []
                      for file, id in fileids:
                          # hack
                          # - we don't use .hgtags
                          # - workingctx produces ids with length 42,
                          #   which we skip since they aren't in any cache
                          if (
                              file == b'.hgtags'
                              or len(id) == 42
                              or not repo.shallowmatch(file)
                          ):
                              continue
                          idstocheck.append((file, bin(id)))
                      datastore = self.datastore
                      historystore = self.historystore
                      if force:
                          datastore = contentstore.unioncontentstore(*repo.shareddatastores)
                          historystore = metadatastore.unionmetadatastore(
                              *repo.sharedhistorystores
                          )
                      missingids = set()
                      if fetchdata:
                          missingids.update(datastore.getmissing(idstocheck))
                      if fetchhistory:
                          missingids.update(historystore.getmissing(idstocheck))
                      # partition missing nodes into nullid and not-nullid so we can
                      # warn about this filtering potentially shadowing bugs.
                      nullids = len(
                          [None for unused, id in missingids if id == self.repo.nullid]
                      )
                      if nullids:
                          missingids = [
                              (f, id) for f, id in missingids if id != self.repo.nullid
                          ]
                          repo.ui.develwarn(
                              (
                                  b'remotefilelog not fetching %d null revs'
                                  b' - this is likely hiding bugs' % nullids
                              ),
                              config=b'remotefilelog-ext',
                          )
                      if missingids:
                          global fetches, fetched, fetchcost
                          fetches += 1
                          # We want to be able to detect excess individual file downloads, so
                          # let's log that information for debugging.
                          if fetches >= 15 and fetches < 18:
                              if fetches == 15:
                                  fetchwarning = self.ui.config(
                                      b'remotefilelog', b'fetchwarning'
                                  )
                                  if fetchwarning:
                                      self.ui.warn(fetchwarning + b'\n')
                              self.logstacktrace()
                          missingids = [(file, hex(id)) for file, id in sorted(missingids)]
                          fetched += len(missingids)
                          start = time.time()
                          missingids = self.request(missingids)
                          if missingids:
                              raise error.Abort(
                                  _(b"unable to download %d files") % len(missingids)
                              )
                          fetchcost += time.time() - start
                          self._lfsprefetch(fileids)
                  def _lfsprefetch(self, fileids):
-                     if not _lfsmod or not hasattr(self.repo.svfs, b'lfslocalblobstore'):
+                     if not _lfsmod or not hasattr(self.repo.svfs, 'lfslocalblobstore'):
                          return
                      if not _lfsmod.wrapper.candownload(self.repo):
                          return
                      pointers = []
                      store = self.repo.svfs.lfslocalblobstore
                      for file, id in fileids:
                          node = bin(id)
                          rlog = self.repo.file(file)
                          if rlog.flags(node) & revlog.REVIDX_EXTSTORED:
                              text = rlog.rawdata(node)
                              p = _lfsmod.pointer.deserialize(text)
                              oid = p.oid()
                              if not store.has(oid):
                                  pointers.append(p)
                      if len(pointers) > 0:
                          self.repo.svfs.lfsremoteblobstore.readbatch(pointers, store)
                          assert all(store.has(p.oid()) for p in pointers)
                  def logstacktrace(self):
                      import traceback
                      self.ui.log(
                          b'remotefilelog',
                          b'excess remotefilelog fetching:\n%s\n',
                          b''.join(pycompat.sysbytes(s) for s in traceback.format_stack()),
                      )

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages