# HG changeset patch # User Mads Kiilerich # Date 2020-12-18 21:03:10 # Node ID 4a18e6bf6b877ec1a06d302076b546fa2c53ecad # Parent a36a8804e7beba18b3bcbc8c97e2af96d508a4bd model: simplify how get_commits_stats task group on author Avoid using the caching h.person . We want to get rid of the model dependency on helpers. The stats are persisted, and any temporary incorrectness in the long term cached h.person will thus remain forever. It is thus arguably better to avoid using it in this place. get_commits_stats is also a long running task, so speed is not *that* critical. And generally, processing commits in order will have a lot of the same committers, so a local cache will have a good hit rate. (Alternatively, h.person could perhaps be in user model ... but that's not how it is now.) diff --git a/kallithea/model/async_tasks.py b/kallithea/model/async_tasks.py --- a/kallithea/model/async_tasks.py +++ b/kallithea/model/async_tasks.py @@ -40,11 +40,10 @@ import celery.utils.log from tg import config import kallithea -import kallithea.lib.helpers as h from kallithea.lib import celerylib, conf, ext_json, hooks from kallithea.lib.indexers.daemon import WhooshIndexingDaemon from kallithea.lib.utils2 import asbool, ascii_bytes -from kallithea.lib.vcs.utils import author_email +from kallithea.lib.vcs.utils import author_email, author_name from kallithea.model import db, repo, userlog @@ -66,6 +65,19 @@ def whoosh_index(repo_location, full_ind .run(full_index=full_index) +def _author_username(author): + """Return the username of the user identified by the email part of the 'author' string, + default to the name or email. + Kind of similar to h.person() .""" + email = author_email(author) + if email: + user = db.User.get_by_email(email) + if user is not None: + return user.username + # Still nothing? Just pass back the author name if any, else the email + return author_name(author) or email + + @celerylib.task @celerylib.dbsession def get_commits_stats(repo_name, ts_min_y, ts_max_y, recurse_limit=100): @@ -124,13 +136,19 @@ def get_commits_stats(repo_name, ts_min_ log.debug('Getting revisions from %s to %s', last_rev, last_rev + parse_limit ) + usernames_cache = {} for cs in scm_repo[last_rev:last_rev + parse_limit]: log.debug('parsing %s', cs) last_cs = cs # remember last parsed changeset tt = cs.date.timetuple() k = mktime(tt[:3] + (0, 0, 0, 0, 0, 0)) - username = h.person(cs.author) + # get username from author - similar to what h.person does + username = usernames_cache.get(cs.author) + if username is None: + username = _author_username(cs.author) + usernames_cache[cs.author] = username + if username in co_day_auth_aggr: try: l = [timegetter(x) for x in diff --git a/scripts/deps.py b/scripts/deps.py --- a/scripts/deps.py +++ b/scripts/deps.py @@ -158,7 +158,6 @@ known_violations = [ ('kallithea.lib.utils', 'kallithea.model'), # clean up utils ('kallithea.lib.utils', 'kallithea.model.db'), ('kallithea.lib.utils', 'kallithea.model.scm'), -('kallithea.model.async_tasks', 'kallithea.lib.helpers'), ('kallithea.model.async_tasks', 'kallithea.lib.hooks'), ('kallithea.model.async_tasks', 'kallithea.lib.indexers'), ('kallithea.model.async_tasks', 'kallithea.model'),