upstream/ipython Commit - r11584:e45e2d2a

attempt to cache gh_api requests...

MinRK -

r11584:e45e2d2a

parent child

tools/gh_api.py

0 +8 -1

-             """Functions for Github authorisation."""
+             """Functions for Github API requests."""
              from __future__ import print_function
              try:
                  input = raw_input
              except NameError:
                  pass
              import os
              import re
              import sys
              import requests
              import getpass
              import json
+             try:
+                 import requests_cache
+             except ImportError:
+                 print("no cache")
+             else:
+                 requests_cache.install_cache("gh_api")
              # Keyring stores passwords by a 'username', but we're not storing a username and
              # password
              fake_username = 'ipython_tools'
              class Obj(dict):
                  """Dictionary with attribute access to names."""
                  def __getattr__(self, name):
                      try:
                          return self[name]
                      except KeyError:
                          raise AttributeError(name)
                  def __setattr__(self, name, val):
                      self[name] = val
              token = None
              def get_auth_token():
                  global token
                  if token is not None:
                      return token
                  import keyring
                  token = keyring.get_password('github', fake_username)
                  if token is not None:
                      return token
                  print("Please enter your github username and password. These are not "
                         "stored, only used to get an oAuth token. You can revoke this at "
                         "any time on Github.")
                  user = input("Username: ")
                  pw = getpass.getpass("Password: ")
                  auth_request = {
                    "scopes": [
                      "public_repo",
                      "gist"
                    ],
                    "note": "IPython tools",
                    "note_url": "https://github.com/ipython/ipython/tree/master/tools",
                  }
                  response = requests.post('https://api.github.com/authorizations',
                                          auth=(user, pw), data=json.dumps(auth_request))
                  response.raise_for_status()
                  token = json.loads(response.text)['token']
                  keyring.set_password('github', fake_username, token)
                  return token
              def make_auth_header():
                  return {'Authorization': 'token ' + get_auth_token()}
              def post_issue_comment(project, num, body):
                  url = 'https://api.github.com/repos/{project}/issues/{num}/comments'.format(project=project, num=num)
                  payload = json.dumps({'body': body})
                  requests.post(url, data=payload, headers=make_auth_header())
              def post_gist(content, description='', filename='file', auth=False):
                  """Post some text to a Gist, and return the URL."""
                  post_data = json.dumps({
                    "description": description,
                    "public": True,
                    "files": {
                      filename: {
                        "content": content
                      }
                    }
                  }).encode('utf-8')
                  headers = make_auth_header() if auth else {}
                  response = requests.post("https://api.github.com/gists", data=post_data, headers=headers)
                  response.raise_for_status()
                  response_data = json.loads(response.text)
                  return response_data['html_url']
              def get_pull_request(project, num, auth=False):
                  """get pull request info  by number
                  """
                  url = "https://api.github.com/repos/{project}/pulls/{num}".format(project=project, num=num)
                  if auth:
                      header = make_auth_header()
                  else:
                      header = None
                  response = requests.get(url, headers=header)
                  response.raise_for_status()
                  return json.loads(response.text, object_hook=Obj)
              element_pat = re.compile(r'<(.+?)>')
              rel_pat = re.compile(r'rel=[\'"](\w+)[\'"]')
              def get_paged_request(url, headers=None):
                  """get a full list, handling APIv3's paging"""
                  results = []
                  while True:
                      print("fetching %s" % url, file=sys.stderr)
                      response = requests.get(url, headers=headers)
                      response.raise_for_status()
                      results.extend(response.json())
                      if 'next' in response.links:
                          url = response.links['next']['url']
                      else:
                          break
                  return results
              def get_pulls_list(project, state="closed", auth=False):
                  """get pull request list
                  """
                  url = "https://api.github.com/repos/{project}/pulls?state={state}&per_page=100".format(project=project, state=state)
                  if auth:
                      headers = make_auth_header()
                  else:
                      headers = None
                  pages = get_paged_request(url, headers=headers)
                  return pages
              def get_issues_list(project, state="closed", auth=False):
                  """get pull request list
                  """
                  url = "https://api.github.com/repos/{project}/pulls?state={state}&per_page=100".format(project=project, state=state)
                  if auth:
                      headers = make_auth_header()
                  else:
                      headers = None
                  pages = get_paged_request(url, headers=headers)
                  return pages
              # encode_multipart_formdata is from urllib3.filepost
              # The only change is to iter_fields, to enforce S3's required key ordering
              def iter_fields(fields):
                  fields = fields.copy()
                  for key in ('key', 'acl', 'Filename', 'success_action_status', 'AWSAccessKeyId',
                      'Policy', 'Signature', 'Content-Type', 'file'):
                      yield (key, fields.pop(key))
                  for (k,v) in fields.items():
                      yield k,v
              def encode_multipart_formdata(fields, boundary=None):
                  """
                  Encode a dictionary of ``fields`` using the multipart/form-data mime format.
                  :param fields:
                      Dictionary of fields or list of (key, value) field tuples.  The key is
                      treated as the field name, and the value as the body of the form-data
                      bytes. If the value is a tuple of two elements, then the first element
                      is treated as the filename of the form-data section.
                      Field names and filenames must be unicode.
                  :param boundary:
                      If not specified, then a random boundary will be generated using
                      :func:`mimetools.choose_boundary`.
                  """
                  # copy requests imports in here:
                  from io import BytesIO
                  from requests.packages.urllib3.filepost import (
                      choose_boundary, six, writer, b, get_content_type
                  )
                  body = BytesIO()
                  if boundary is None:
                      boundary = choose_boundary()
                  for fieldname, value in iter_fields(fields):
                      body.write(b('--%s\r\n' % (boundary)))
                      if isinstance(value, tuple):
                          filename, data = value
                          writer(body).write('Content-Disposition: form-data; name="%s"; '
                                             'filename="%s"\r\n' % (fieldname, filename))
                          body.write(b('Content-Type: %s\r\n\r\n' %
                                     (get_content_type(filename))))
                      else:
                          data = value
                          writer(body).write('Content-Disposition: form-data; name="%s"\r\n'
                                             % (fieldname))
                          body.write(b'Content-Type: text/plain\r\n\r\n')
                      if isinstance(data, int):
                          data = str(data)  # Backwards compatibility
                      if isinstance(data, six.text_type):
                          writer(body).write(data)
                      else:
                          body.write(data)
                      body.write(b'\r\n')
                  body.write(b('--%s--\r\n' % (boundary)))
                  content_type = b('multipart/form-data; boundary=%s' % boundary)
                  return body.getvalue(), content_type
              def post_download(project, filename, name=None, description=""):
                  """Upload a file to the GitHub downloads area"""
                  if name is None:
                      name = os.path.basename(filename)
                  with open(filename, 'rb') as f:
                      filedata = f.read()
                  url = "https://api.github.com/repos/{project}/downloads".format(project=project)
                  payload = json.dumps(dict(name=name, size=len(filedata),
                                  description=description))
                  response = requests.post(url, data=payload, headers=make_auth_header())
                  response.raise_for_status()
                  reply = json.loads(response.content)
                  s3_url = reply['s3_url']
                  fields = dict(
                      key=reply['path'],
                      acl=reply['acl'],
                      success_action_status=201,
                      Filename=reply['name'],
                      AWSAccessKeyId=reply['accesskeyid'],
                      Policy=reply['policy'],
                      Signature=reply['signature'],
                      file=(reply['name'], filedata),
                  )
                  fields['Content-Type'] = reply['mime_type']
                  data, content_type = encode_multipart_formdata(fields)
                  s3r = requests.post(s3_url, data=data, headers={'Content-Type': content_type})
                  return s3r

tools/github_stats.py

0 +33 -42

              #!/usr/bin/env python
              """Simple tools to query github.com and gather stats about issues.
              """
              #-----------------------------------------------------------------------------
              # Imports
              #-----------------------------------------------------------------------------
              from __future__ import print_function
              import json
              import re
              import sys
              from datetime import datetime, timedelta
              from subprocess import check_output
-             from urllib import urlopen
+             from gh_api import get_paged_request, make_auth_header, get_pull_request
              #-----------------------------------------------------------------------------
              # Globals
              #-----------------------------------------------------------------------------
              ISO8601 = "%Y-%m-%dT%H:%M:%SZ"
              PER_PAGE = 100
-             element_pat = re.compile(r'<(.+?)>')
-             rel_pat = re.compile(r'rel=[\'"](\w+)[\'"]')
              #-----------------------------------------------------------------------------
              # Functions
              #-----------------------------------------------------------------------------
-             def parse_link_header(headers):
-                 link_s = headers.get('link', '')
-                 urls = element_pat.findall(link_s)
-                 rels = rel_pat.findall(link_s)
-                 d = {}
-                 for rel,url in zip(rels, urls):
-                     d[rel] = url
-                 return d
-             def get_paged_request(url):
-                 """get a full list, handling APIv3's paging"""
-                 results = []
-                 while url:
-                     print("fetching %s" % url, file=sys.stderr)
-                     f = urlopen(url)
-                     results.extend(json.load(f))
-                     links = parse_link_header(f.headers)
-                     url = links.get('next')
-                 return results
              def get_issues(project="ipython/ipython", state="closed", pulls=False):
                  """Get a list of the issues from the Github API."""
                  which = 'pulls' if pulls else 'issues'
                  url = "https://api.github.com/repos/%s/%s?state=%s&per_page=%i" % (project, which, state, PER_PAGE)
-                 return get_paged_request(url)
+                 return get_paged_request(url, headers=make_auth_header())
              def _parse_datetime(s):
                  """Parse dates in the format returned by the Github API."""
                  if s:
                      return datetime.strptime(s, ISO8601)
                  else:
                      return datetime.fromtimestamp(0)
              def issues2dict(issues):
                  """Convert a list of issues to a dict, keyed by issue number."""
                  idict = {}
                  for i in issues:
                      idict[i['number']] = i
                  return idict
              def is_pull_request(issue):
                  """Return True if the given issue is a pull request."""
-                 return 'pull_request_url' in issue
+                 return bool(issue.get('pull_request', {}).get('html_url', None))
+             def split_pulls(all_issues, project="ipython/ipython"):
+                 """split a list of closed issues into non-PR Issues and Pull Requests"""
+                 pulls = []
+                 issues = []
+                 for i in all_issues:
+                     if is_pull_request(i):
+                         pull = get_pull_request(project, i['number'], auth=True)
+                         pulls.append(pull)
+                     else:
+                         issues.append(i)
+                 return issues, pulls
-             def issues_closed_since(period=timedelta(days=365), project="ipython/ipython", pulls=False):
+             def issues_closed_since(period=timedelta(days=365), project="ipython/ipython"):
                  """Get all issues closed since a particular point in time. period
-             can either be a datetime object, or a timedelta object. In the
-             latter case, it is used as a time before the present."""
+                 can either be a datetime object, or a timedelta object. In the
+                 latter case, it is used as a time before the present.
+                 """
-                 which = 'pulls' if pulls else 'issues'
+                 which = 'issues'
                  if isinstance(period, timedelta):
-                     period = datetime.now() - period
-                 url = "https://api.github.com/repos/%s/%s?state=closed&sort=updated&since=%s&per_page=%i" % (project, which, period.strftime(ISO8601), PER_PAGE)
-                 allclosed = get_paged_request(url)
-                 # allclosed = get_issues(project=project, state='closed', pulls=pulls, since=period)
-                 filtered = [i for i in allclosed if _parse_datetime(i['closed_at']) > period]
+                     since = datetime.now() - period
+                 else:
+                     since = period
+                 url = "https://api.github.com/repos/%s/%s?state=closed&sort=updated&since=%s&per_page=%i" % (project, which, since.strftime(ISO8601), PER_PAGE)
+                 allclosed = get_paged_request(url, headers=make_auth_header())
-                 # exclude rejected PRs
-                 if pulls:
-                     filtered = [ pr for pr in filtered if pr['merged_at'] ]
+                 issues, pulls = split_pulls(allclosed, project=project)
+                 issues = [i for i in issues if _parse_datetime(i['closed_at']) > since]
+                 pulls = [p for p in pulls if p['merged_at'] and _parse_datetime(p['merged_at']) > since]
-                 return filtered
+                 return issues, pulls
              def sorted_by_field(issues, field='closed_at', reverse=False):
                  """Return a list of issues sorted by closing date date."""
                  return sorted(issues, key = lambda i:i[field], reverse=reverse)
              def report(issues, show_urls=False):
                  """Summary report about a list of issues, printing number and title.
                  """
                  # titles may have unicode in them, so we must encode everything below
                  if show_urls:
                      for i in issues:
                          role = 'ghpull' if 'merged_at' in i else 'ghissue'
                          print('* :%s:`%d`: %s' % (role, i['number'],
                                                      i['title'].encode('utf-8')))
                  else:
                      for i in issues:
                          print('* %d: %s' % (i['number'], i['title'].encode('utf-8')))
              #-----------------------------------------------------------------------------
              # Main script
              #-----------------------------------------------------------------------------
              if __name__ == "__main__":
                  # Whether to add reST urls for all issues in printout.
                  show_urls = True
                  # By default, search one month back
                  tag = None
                  if len(sys.argv) > 1:
                      try:
                          days = int(sys.argv[1])
                      except:
                          tag = sys.argv[1]
                  else:
                      tag = check_output(['git', 'describe', '--abbrev=0']).strip()
                  if tag:
                      cmd = ['git', 'log', '-1', '--format=%ai', tag]
                      tagday, tz = check_output(cmd).strip().rsplit(' ', 1)
-                     since = datetime.strptime(tagday, "%Y-%m-%d %H:%M:%S")
+                     since = datetime.strptime(tagday, "%Y-%m-%d %H:%M:%S")# - timedelta(days=30 * 6)
                  else:
                      since = datetime.now() - timedelta(days=days)
                  print("fetching GitHub stats since %s (tag: %s)" % (since, tag), file=sys.stderr)
                  # turn off to play interactively without redownloading, use %run -i
                  if 1:
-                     issues = issues_closed_since(since, pulls=False)
-                     pulls = issues_closed_since(since, pulls=True)
+                     issues, pulls = issues_closed_since(since)
                  # For regular reports, it's nice to show them in reverse chronological order
                  issues = sorted_by_field(issues, reverse=True)
                  pulls = sorted_by_field(pulls, reverse=True)
                  n_issues, n_pulls = map(len, (issues, pulls))
                  n_total = n_issues + n_pulls
                  # Print summary report we can directly include into release notes.
                  print()
                  since_day = since.strftime("%Y/%m/%d")
                  today = datetime.today().strftime("%Y/%m/%d")
                  print("GitHub stats for %s - %s (tag: %s)" % (since_day, today, tag))
                  print()
                  print("These lists are automatically generated, and may be incomplete or contain duplicates.")
                  print()
                  if tag:
                      # print git info, in addition to GitHub info:
                      since_tag = tag+'..'
                      cmd = ['git', 'log', '--oneline', since_tag]
                      ncommits = len(check_output(cmd).splitlines())
                      author_cmd = ['git', 'log', '--format=* %aN', since_tag]
                      all_authors = check_output(author_cmd).splitlines()
                      unique_authors = sorted(set(all_authors))
                      print("The following %i authors contributed %i commits." % (len(unique_authors), ncommits))
                      print()
                      print('\n'.join(unique_authors))
                      print()
                  print()
                  print("We closed a total of %d issues, %d pull requests and %d regular issues;\n"
                        "this is the full list (generated with the script \n"
                        ":file:`tools/github_stats.py`):" % (n_total, n_pulls, n_issues))
                  print()
                  print('Pull Requests (%d):\n' % n_pulls)
                  report(pulls, show_urls)
                  print()
                  print('Issues (%d):\n' % n_issues)
                  report(issues, show_urls)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages