##// END OF EJS Templates
model: simplify how get_commits_stats task group on author...
Mads Kiilerich -
r8595:4a18e6bf default
parent child Browse files
Show More
@@ -1,509 +1,527 b''
1 1 # -*- coding: utf-8 -*-
2 2 # This program is free software: you can redistribute it and/or modify
3 3 # it under the terms of the GNU General Public License as published by
4 4 # the Free Software Foundation, either version 3 of the License, or
5 5 # (at your option) any later version.
6 6 #
7 7 # This program is distributed in the hope that it will be useful,
8 8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 10 # GNU General Public License for more details.
11 11 #
12 12 # You should have received a copy of the GNU General Public License
13 13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 14 """
15 15 kallithea.model.async_tasks
16 16 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
17 17
18 18 Kallithea task modules, containing all task that suppose to be run
19 19 by celery daemon
20 20
21 21 This file was forked by the Kallithea project in July 2014.
22 22 Original author and date, and relevant copyright and licensing information is below:
23 23 :created_on: Oct 6, 2010
24 24 :author: marcink
25 25 :copyright: (c) 2013 RhodeCode GmbH, and others.
26 26 :license: GPLv3, see LICENSE.md for more details.
27 27 """
28 28
29 29 import email.message
30 30 import email.utils
31 31 import os
32 32 import smtplib
33 33 import time
34 34 import traceback
35 35 from collections import OrderedDict
36 36 from operator import itemgetter
37 37 from time import mktime
38 38
39 39 import celery.utils.log
40 40 from tg import config
41 41
42 42 import kallithea
43 import kallithea.lib.helpers as h
44 43 from kallithea.lib import celerylib, conf, ext_json, hooks
45 44 from kallithea.lib.indexers.daemon import WhooshIndexingDaemon
46 45 from kallithea.lib.utils2 import asbool, ascii_bytes
47 from kallithea.lib.vcs.utils import author_email
46 from kallithea.lib.vcs.utils import author_email, author_name
48 47 from kallithea.model import db, repo, userlog
49 48
50 49
51 50 __all__ = ['whoosh_index', 'get_commits_stats', 'send_email']
52 51
53 52
54 53 log = celery.utils.log.get_task_logger(__name__)
55 54
56 55
57 56 @celerylib.task
58 57 @celerylib.locked_task
59 58 @celerylib.dbsession
60 59 def whoosh_index(repo_location, full_index):
61 60 celerylib.get_session() # initialize database connection
62 61
63 62 index_location = config['index_dir']
64 63 WhooshIndexingDaemon(index_location=index_location,
65 64 repo_location=repo_location) \
66 65 .run(full_index=full_index)
67 66
68 67
68 def _author_username(author):
69 """Return the username of the user identified by the email part of the 'author' string,
70 default to the name or email.
71 Kind of similar to h.person() ."""
72 email = author_email(author)
73 if email:
74 user = db.User.get_by_email(email)
75 if user is not None:
76 return user.username
77 # Still nothing? Just pass back the author name if any, else the email
78 return author_name(author) or email
79
80
69 81 @celerylib.task
70 82 @celerylib.dbsession
71 83 def get_commits_stats(repo_name, ts_min_y, ts_max_y, recurse_limit=100):
72 84 DBS = celerylib.get_session()
73 85 lockkey = celerylib.__get_lockkey('get_commits_stats', repo_name, ts_min_y,
74 86 ts_max_y)
75 87 lockkey_path = config.get('cache_dir') or config['app_conf']['cache_dir'] # Backward compatibility for TurboGears < 2.4
76 88
77 89 log.info('running task with lockkey %s', lockkey)
78 90
79 91 try:
80 92 lock = celerylib.DaemonLock(os.path.join(lockkey_path, lockkey))
81 93
82 94 co_day_auth_aggr = {}
83 95 commits_by_day_aggregate = {}
84 96 db_repo = db.Repository.get_by_repo_name(repo_name)
85 97 if db_repo is None:
86 98 return True
87 99
88 100 scm_repo = db_repo.scm_instance
89 101 repo_size = scm_repo.count()
90 102 # return if repo have no revisions
91 103 if repo_size < 1:
92 104 lock.release()
93 105 return True
94 106
95 107 skip_date_limit = True
96 108 parse_limit = int(config.get('commit_parse_limit'))
97 109 last_rev = None
98 110 last_cs = None
99 111 timegetter = itemgetter('time')
100 112
101 113 dbrepo = DBS.query(db.Repository) \
102 114 .filter(db.Repository.repo_name == repo_name).scalar()
103 115 cur_stats = DBS.query(db.Statistics) \
104 116 .filter(db.Statistics.repository == dbrepo).scalar()
105 117
106 118 if cur_stats is not None:
107 119 last_rev = cur_stats.stat_on_revision
108 120
109 121 if last_rev == scm_repo.get_changeset().revision and repo_size > 1:
110 122 # pass silently without any work if we're not on first revision or
111 123 # current state of parsing revision(from db marker) is the
112 124 # last revision
113 125 lock.release()
114 126 return True
115 127
116 128 if cur_stats:
117 129 commits_by_day_aggregate = OrderedDict(ext_json.loads(
118 130 cur_stats.commit_activity_combined))
119 131 co_day_auth_aggr = ext_json.loads(cur_stats.commit_activity)
120 132
121 133 log.debug('starting parsing %s', parse_limit)
122 134
123 135 last_rev = last_rev + 1 if last_rev and last_rev >= 0 else 0
124 136 log.debug('Getting revisions from %s to %s',
125 137 last_rev, last_rev + parse_limit
126 138 )
139 usernames_cache = {}
127 140 for cs in scm_repo[last_rev:last_rev + parse_limit]:
128 141 log.debug('parsing %s', cs)
129 142 last_cs = cs # remember last parsed changeset
130 143 tt = cs.date.timetuple()
131 144 k = mktime(tt[:3] + (0, 0, 0, 0, 0, 0))
132 145
133 username = h.person(cs.author)
146 # get username from author - similar to what h.person does
147 username = usernames_cache.get(cs.author)
148 if username is None:
149 username = _author_username(cs.author)
150 usernames_cache[cs.author] = username
151
134 152 if username in co_day_auth_aggr:
135 153 try:
136 154 l = [timegetter(x) for x in
137 155 co_day_auth_aggr[username]['data']]
138 156 time_pos = l.index(k)
139 157 except ValueError:
140 158 time_pos = None
141 159
142 160 if time_pos is not None and time_pos >= 0:
143 161 datadict = \
144 162 co_day_auth_aggr[username]['data'][time_pos]
145 163
146 164 datadict["commits"] += 1
147 165 datadict["added"] += len(cs.added)
148 166 datadict["changed"] += len(cs.changed)
149 167 datadict["removed"] += len(cs.removed)
150 168
151 169 else:
152 170 if k >= ts_min_y and k <= ts_max_y or skip_date_limit:
153 171
154 172 datadict = {"time": k,
155 173 "commits": 1,
156 174 "added": len(cs.added),
157 175 "changed": len(cs.changed),
158 176 "removed": len(cs.removed),
159 177 }
160 178 co_day_auth_aggr[username]['data'] \
161 179 .append(datadict)
162 180
163 181 else:
164 182 if k >= ts_min_y and k <= ts_max_y or skip_date_limit:
165 183 co_day_auth_aggr[username] = {
166 184 "label": username,
167 185 "data": [{"time": k,
168 186 "commits": 1,
169 187 "added": len(cs.added),
170 188 "changed": len(cs.changed),
171 189 "removed": len(cs.removed),
172 190 }],
173 191 "schema": ["commits"],
174 192 }
175 193
176 194 # gather all data by day
177 195 if k in commits_by_day_aggregate:
178 196 commits_by_day_aggregate[k] += 1
179 197 else:
180 198 commits_by_day_aggregate[k] = 1
181 199
182 200 overview_data = sorted(commits_by_day_aggregate.items(),
183 201 key=itemgetter(0))
184 202
185 203 stats = cur_stats if cur_stats else db.Statistics()
186 204 stats.commit_activity = ascii_bytes(ext_json.dumps(co_day_auth_aggr))
187 205 stats.commit_activity_combined = ascii_bytes(ext_json.dumps(overview_data))
188 206
189 207 log.debug('last revision %s', last_rev)
190 208 leftovers = len(scm_repo.revisions[last_rev:])
191 209 log.debug('revisions to parse %s', leftovers)
192 210
193 211 if last_rev == 0 or leftovers < parse_limit:
194 212 log.debug('getting code trending stats')
195 213 stats.languages = ascii_bytes(ext_json.dumps(__get_codes_stats(repo_name)))
196 214
197 215 try:
198 216 stats.repository = dbrepo
199 217 stats.stat_on_revision = last_cs.revision if last_cs else 0
200 218 DBS.add(stats)
201 219 DBS.commit()
202 220 except:
203 221 log.error(traceback.format_exc())
204 222 DBS.rollback()
205 223 lock.release()
206 224 return False
207 225
208 226 # final release
209 227 lock.release()
210 228
211 229 # execute another task if celery is enabled
212 230 if len(scm_repo.revisions) > 1 and kallithea.CELERY_APP and recurse_limit > 0:
213 231 get_commits_stats(repo_name, ts_min_y, ts_max_y, recurse_limit - 1)
214 232 elif recurse_limit <= 0:
215 233 log.debug('Not recursing - limit has been reached')
216 234 else:
217 235 log.debug('Not recursing')
218 236 except celerylib.LockHeld:
219 237 log.info('Task with key %s already running', lockkey)
220 238 return 'Task with key %s already running' % lockkey
221 239
222 240
223 241 @celerylib.task
224 242 @celerylib.dbsession
225 243 def send_email(recipients, subject, body='', html_body='', headers=None, from_name=None):
226 244 """
227 245 Sends an email with defined parameters from the .ini files.
228 246
229 247 :param recipients: list of recipients, if this is None, the defined email
230 248 address from field 'email_to' and all admins is used instead
231 249 :param subject: subject of the mail
232 250 :param body: plain text body of the mail
233 251 :param html_body: html version of body
234 252 :param headers: dictionary of prepopulated e-mail headers
235 253 :param from_name: full name to be used as sender of this mail - often a
236 254 .full_name_or_username value
237 255 """
238 256 assert isinstance(recipients, list), recipients
239 257 if headers is None:
240 258 headers = {}
241 259 else:
242 260 # do not modify the original headers object passed by the caller
243 261 headers = headers.copy()
244 262
245 263 email_config = config
246 264 email_prefix = email_config.get('email_prefix', '')
247 265 if email_prefix:
248 266 subject = "%s %s" % (email_prefix, subject)
249 267
250 268 if not recipients:
251 269 # if recipients are not defined we send to email_config + all admins
252 270 recipients = [u.email for u in db.User.query()
253 271 .filter(db.User.admin == True).all()]
254 272 if email_config.get('email_to') is not None:
255 273 recipients += email_config.get('email_to').split(',')
256 274
257 275 # If there are still no recipients, there are no admins and no address
258 276 # configured in email_to, so return.
259 277 if not recipients:
260 278 log.error("No recipients specified and no fallback available.")
261 279 return False
262 280
263 281 log.warning("No recipients specified for '%s' - sending to admins %s", subject, ' '.join(recipients))
264 282
265 283 # SMTP sender
266 284 app_email_from = email_config.get('app_email_from', 'Kallithea')
267 285 # 'From' header
268 286 if from_name is not None:
269 287 # set From header based on from_name but with a generic e-mail address
270 288 # In case app_email_from is in "Some Name <e-mail>" format, we first
271 289 # extract the e-mail address.
272 290 envelope_addr = author_email(app_email_from)
273 291 headers['From'] = '"%s" <%s>' % (
274 292 email.utils.quote('%s (no-reply)' % from_name),
275 293 envelope_addr)
276 294
277 295 smtp_server = email_config.get('smtp_server')
278 296 smtp_port = email_config.get('smtp_port')
279 297 smtp_use_tls = asbool(email_config.get('smtp_use_tls'))
280 298 smtp_use_ssl = asbool(email_config.get('smtp_use_ssl'))
281 299 smtp_auth = email_config.get('smtp_auth') # undocumented - overrule automatic choice of auth mechanism
282 300 smtp_username = email_config.get('smtp_username')
283 301 smtp_password = email_config.get('smtp_password')
284 302
285 303 logmsg = ("Mail details:\n"
286 304 "recipients: %s\n"
287 305 "headers: %s\n"
288 306 "subject: %s\n"
289 307 "body:\n%s\n"
290 308 "html:\n%s\n"
291 309 % (' '.join(recipients), headers, subject, body, html_body))
292 310
293 311 if smtp_server:
294 312 log.debug("Sending e-mail. " + logmsg)
295 313 else:
296 314 log.error("SMTP mail server not configured - cannot send e-mail.")
297 315 log.warning(logmsg)
298 316 return False
299 317
300 318 msg = email.message.EmailMessage()
301 319 msg['Subject'] = subject
302 320 msg['From'] = app_email_from # fallback - might be overridden by a header
303 321 msg['To'] = ', '.join(recipients)
304 322 msg['Date'] = email.utils.formatdate(time.time())
305 323
306 324 for key, value in headers.items():
307 325 del msg[key] # Delete key first to make sure add_header will replace header (if any), no matter the casing
308 326 msg.add_header(key, value)
309 327
310 328 msg.set_content(body)
311 329 msg.add_alternative(html_body, subtype='html')
312 330
313 331 try:
314 332 if smtp_use_ssl:
315 333 smtp_serv = smtplib.SMTP_SSL(smtp_server, smtp_port)
316 334 else:
317 335 smtp_serv = smtplib.SMTP(smtp_server, smtp_port)
318 336
319 337 if smtp_use_tls:
320 338 smtp_serv.starttls()
321 339
322 340 if smtp_auth:
323 341 smtp_serv.ehlo() # populate esmtp_features
324 342 smtp_serv.esmtp_features["auth"] = smtp_auth
325 343
326 344 if smtp_username and smtp_password is not None:
327 345 smtp_serv.login(smtp_username, smtp_password)
328 346
329 347 smtp_serv.sendmail(app_email_from, recipients, msg.as_string())
330 348 smtp_serv.quit()
331 349
332 350 log.info('Mail was sent to: %s' % recipients)
333 351 except:
334 352 log.error('Mail sending failed')
335 353 log.error(traceback.format_exc())
336 354 return False
337 355 return True
338 356
339 357
340 358 @celerylib.task
341 359 @celerylib.dbsession
342 360 def create_repo(form_data, cur_user):
343 361 DBS = celerylib.get_session()
344 362
345 363 cur_user = db.User.guess_instance(cur_user)
346 364
347 365 owner = cur_user
348 366 repo_name = form_data['repo_name']
349 367 repo_name_full = form_data['repo_name_full']
350 368 repo_type = form_data['repo_type']
351 369 description = form_data['repo_description']
352 370 private = form_data['repo_private']
353 371 clone_uri = form_data.get('clone_uri')
354 372 repo_group = form_data['repo_group']
355 373 landing_rev = form_data['repo_landing_rev']
356 374 copy_fork_permissions = form_data.get('copy_permissions')
357 375 copy_group_permissions = form_data.get('repo_copy_permissions')
358 376 fork_of = form_data.get('fork_parent_id')
359 377 state = form_data.get('repo_state', db.Repository.STATE_PENDING)
360 378
361 379 # repo creation defaults, private and repo_type are filled in form
362 380 defs = db.Setting.get_default_repo_settings(strip_prefix=True)
363 381 enable_statistics = defs.get('repo_enable_statistics')
364 382 enable_downloads = defs.get('repo_enable_downloads')
365 383
366 384 try:
367 385 db_repo = repo.RepoModel()._create_repo(
368 386 repo_name=repo_name_full,
369 387 repo_type=repo_type,
370 388 description=description,
371 389 owner=owner,
372 390 private=private,
373 391 clone_uri=clone_uri,
374 392 repo_group=repo_group,
375 393 landing_rev=landing_rev,
376 394 fork_of=fork_of,
377 395 copy_fork_permissions=copy_fork_permissions,
378 396 copy_group_permissions=copy_group_permissions,
379 397 enable_statistics=enable_statistics,
380 398 enable_downloads=enable_downloads,
381 399 state=state
382 400 )
383 401
384 402 userlog.action_logger(cur_user, 'user_created_repo',
385 403 form_data['repo_name_full'], '')
386 404
387 405 DBS.commit()
388 406 # now create this repo on Filesystem
389 407 repo.RepoModel()._create_filesystem_repo(
390 408 repo_name=repo_name,
391 409 repo_type=repo_type,
392 410 repo_group=db.RepoGroup.guess_instance(repo_group),
393 411 clone_uri=clone_uri,
394 412 )
395 413 db_repo = db.Repository.get_by_repo_name(repo_name_full)
396 414 hooks.log_create_repository(db_repo.get_dict(), created_by=owner.username)
397 415
398 416 # update repo changeset caches initially
399 417 db_repo.update_changeset_cache()
400 418
401 419 # set new created state
402 420 db_repo.set_state(db.Repository.STATE_CREATED)
403 421 DBS.commit()
404 422 except Exception as e:
405 423 log.warning('Exception %s occurred when forking repository, '
406 424 'doing cleanup...' % e)
407 425 # rollback things manually !
408 426 db_repo = db.Repository.get_by_repo_name(repo_name_full)
409 427 if db_repo:
410 428 db.Repository.delete(db_repo.repo_id)
411 429 DBS.commit()
412 430 repo.RepoModel()._delete_filesystem_repo(db_repo)
413 431 raise
414 432
415 433 return True
416 434
417 435
418 436 @celerylib.task
419 437 @celerylib.dbsession
420 438 def create_repo_fork(form_data, cur_user):
421 439 """
422 440 Creates a fork of repository using interval VCS methods
423 441
424 442 :param form_data:
425 443 :param cur_user:
426 444 """
427 445 DBS = celerylib.get_session()
428 446
429 447 base_path = kallithea.CONFIG['base_path']
430 448 cur_user = db.User.guess_instance(cur_user)
431 449
432 450 repo_name = form_data['repo_name'] # fork in this case
433 451 repo_name_full = form_data['repo_name_full']
434 452
435 453 repo_type = form_data['repo_type']
436 454 owner = cur_user
437 455 private = form_data['private']
438 456 clone_uri = form_data.get('clone_uri')
439 457 repo_group = form_data['repo_group']
440 458 landing_rev = form_data['landing_rev']
441 459 copy_fork_permissions = form_data.get('copy_permissions')
442 460
443 461 try:
444 462 fork_of = db.Repository.guess_instance(form_data.get('fork_parent_id'))
445 463
446 464 repo.RepoModel()._create_repo(
447 465 repo_name=repo_name_full,
448 466 repo_type=repo_type,
449 467 description=form_data['description'],
450 468 owner=owner,
451 469 private=private,
452 470 clone_uri=clone_uri,
453 471 repo_group=repo_group,
454 472 landing_rev=landing_rev,
455 473 fork_of=fork_of,
456 474 copy_fork_permissions=copy_fork_permissions
457 475 )
458 476 userlog.action_logger(cur_user, 'user_forked_repo:%s' % repo_name_full,
459 477 fork_of.repo_name, '')
460 478 DBS.commit()
461 479
462 480 source_repo_path = os.path.join(base_path, fork_of.repo_name)
463 481
464 482 # now create this repo on Filesystem
465 483 repo.RepoModel()._create_filesystem_repo(
466 484 repo_name=repo_name,
467 485 repo_type=repo_type,
468 486 repo_group=db.RepoGroup.guess_instance(repo_group),
469 487 clone_uri=source_repo_path,
470 488 )
471 489 db_repo = db.Repository.get_by_repo_name(repo_name_full)
472 490 hooks.log_create_repository(db_repo.get_dict(), created_by=owner.username)
473 491
474 492 # update repo changeset caches initially
475 493 db_repo.update_changeset_cache()
476 494
477 495 # set new created state
478 496 db_repo.set_state(db.Repository.STATE_CREATED)
479 497 DBS.commit()
480 498 except Exception as e:
481 499 log.warning('Exception %s occurred when forking repository, '
482 500 'doing cleanup...' % e)
483 501 # rollback things manually !
484 502 db_repo = db.Repository.get_by_repo_name(repo_name_full)
485 503 if db_repo:
486 504 db.Repository.delete(db_repo.repo_id)
487 505 DBS.commit()
488 506 repo.RepoModel()._delete_filesystem_repo(db_repo)
489 507 raise
490 508
491 509 return True
492 510
493 511
494 512 def __get_codes_stats(repo_name):
495 513 scm_repo = db.Repository.get_by_repo_name(repo_name).scm_instance
496 514
497 515 tip = scm_repo.get_changeset()
498 516 code_stats = {}
499 517
500 518 for _topnode, _dirnodes, filenodes in tip.walk('/'):
501 519 for filenode in filenodes:
502 520 ext = filenode.extension.lower()
503 521 if ext in conf.LANGUAGES_EXTENSIONS_MAP and not filenode.is_binary:
504 522 if ext in code_stats:
505 523 code_stats[ext] += 1
506 524 else:
507 525 code_stats[ext] = 1
508 526
509 527 return code_stats or {}
@@ -1,294 +1,293 b''
1 1 #!/usr/bin/env python3
2 2
3 3
4 4 import re
5 5 import sys
6 6
7 7
8 8 ignored_modules = set('''
9 9 argparse
10 10 base64
11 11 bcrypt
12 12 binascii
13 13 bleach
14 14 calendar
15 15 celery
16 16 celery
17 17 chardet
18 18 click
19 19 collections
20 20 configparser
21 21 copy
22 22 csv
23 23 ctypes
24 24 datetime
25 25 dateutil
26 26 decimal
27 27 decorator
28 28 difflib
29 29 distutils
30 30 docutils
31 31 email
32 32 errno
33 33 fileinput
34 34 functools
35 35 getpass
36 36 grp
37 37 hashlib
38 38 hmac
39 39 html
40 40 http
41 41 imp
42 42 importlib
43 43 inspect
44 44 io
45 45 ipaddr
46 46 IPython
47 47 isapi_wsgi
48 48 itertools
49 49 json
50 50 kajiki
51 51 ldap
52 52 logging
53 53 mako
54 54 markdown
55 55 mimetypes
56 56 mock
57 57 msvcrt
58 58 multiprocessing
59 59 operator
60 60 os
61 61 paginate
62 62 paginate_sqlalchemy
63 63 pam
64 64 paste
65 65 pkg_resources
66 66 platform
67 67 posixpath
68 68 pprint
69 69 pwd
70 70 pyflakes
71 71 pytest
72 72 pytest_localserver
73 73 random
74 74 re
75 75 routes
76 76 setuptools
77 77 shlex
78 78 shutil
79 79 smtplib
80 80 socket
81 81 ssl
82 82 stat
83 83 string
84 84 struct
85 85 subprocess
86 86 sys
87 87 tarfile
88 88 tempfile
89 89 textwrap
90 90 tgext
91 91 threading
92 92 time
93 93 traceback
94 94 traitlets
95 95 types
96 96 urllib
97 97 urlobject
98 98 uuid
99 99 warnings
100 100 webhelpers2
101 101 webob
102 102 webtest
103 103 whoosh
104 104 win32traceutil
105 105 zipfile
106 106 '''.split())
107 107
108 108 top_modules = set('''
109 109 kallithea.alembic
110 110 kallithea.bin
111 111 kallithea.config
112 112 kallithea.controllers
113 113 kallithea.templates.py
114 114 scripts
115 115 '''.split())
116 116
117 117 bottom_external_modules = set('''
118 118 tg
119 119 mercurial
120 120 sqlalchemy
121 121 alembic
122 122 formencode
123 123 pygments
124 124 dulwich
125 125 beaker
126 126 psycopg2
127 127 docs
128 128 setup
129 129 conftest
130 130 '''.split())
131 131
132 132 normal_modules = set('''
133 133 kallithea
134 134 kallithea.controllers.base
135 135 kallithea.lib
136 136 kallithea.lib.auth
137 137 kallithea.lib.auth_modules
138 138 kallithea.lib.celerylib
139 139 kallithea.lib.db_manage
140 140 kallithea.lib.helpers
141 141 kallithea.lib.hooks
142 142 kallithea.lib.indexers
143 143 kallithea.lib.utils
144 144 kallithea.lib.utils2
145 145 kallithea.lib.vcs
146 146 kallithea.lib.webutils
147 147 kallithea.model
148 148 kallithea.model.async_tasks
149 149 kallithea.model.scm
150 150 kallithea.templates.py
151 151 '''.split())
152 152
153 153 shown_modules = normal_modules | top_modules
154 154
155 155 # break the chains somehow - this is a cleanup TODO list
156 156 known_violations = [
157 157 ('kallithea.lib.auth_modules', 'kallithea.lib.auth'), # needs base&facade
158 158 ('kallithea.lib.utils', 'kallithea.model'), # clean up utils
159 159 ('kallithea.lib.utils', 'kallithea.model.db'),
160 160 ('kallithea.lib.utils', 'kallithea.model.scm'),
161 ('kallithea.model.async_tasks', 'kallithea.lib.helpers'),
162 161 ('kallithea.model.async_tasks', 'kallithea.lib.hooks'),
163 162 ('kallithea.model.async_tasks', 'kallithea.lib.indexers'),
164 163 ('kallithea.model.async_tasks', 'kallithea.model'),
165 164 ('kallithea.model', 'kallithea.lib.auth'), # auth.HasXXX
166 165 ('kallithea.model', 'kallithea.lib.auth_modules'), # validators
167 166 ('kallithea.model', 'kallithea.lib.hooks'), # clean up hooks
168 167 ('kallithea.model', 'kallithea.model.scm'),
169 168 ('kallithea.model.scm', 'kallithea.lib.hooks'),
170 169 ]
171 170
172 171 extra_edges = [
173 172 ('kallithea.config', 'kallithea.controllers'), # through TG
174 173 ('kallithea.lib.auth', 'kallithea.lib.auth_modules'), # custom loader
175 174 ]
176 175
177 176
178 177 def normalize(s):
179 178 """Given a string with dot path, return the string it should be shown as."""
180 179 parts = s.replace('.__init__', '').split('.')
181 180 short_2 = '.'.join(parts[:2])
182 181 short_3 = '.'.join(parts[:3])
183 182 short_4 = '.'.join(parts[:4])
184 183 if parts[0] in ['scripts', 'contributor_data', 'i18n_utils']:
185 184 return 'scripts'
186 185 if short_3 == 'kallithea.model.meta':
187 186 return 'kallithea.model.db'
188 187 if parts[:4] == ['kallithea', 'lib', 'vcs', 'ssh']:
189 188 return 'kallithea.lib.vcs.ssh'
190 189 if short_4 in shown_modules:
191 190 return short_4
192 191 if short_3 in shown_modules:
193 192 return short_3
194 193 if short_2 in shown_modules:
195 194 return short_2
196 195 if short_2 == 'kallithea.tests':
197 196 return None
198 197 if parts[0] in ignored_modules:
199 198 return None
200 199 assert parts[0] in bottom_external_modules, parts
201 200 return parts[0]
202 201
203 202
204 203 def main(filenames):
205 204 if not filenames or filenames[0].startswith('-'):
206 205 print('''\
207 206 Usage:
208 207 hg files 'set:!binary()&grep("^#!.*python")' 'set:**.py' | xargs scripts/deps.py
209 208 dot -Tsvg deps.dot > deps.svg
210 209 ''')
211 210 raise SystemExit(1)
212 211
213 212 files_imports = dict() # map filenames to its imports
214 213 import_deps = set() # set of tuples with module name and its imports
215 214 for fn in filenames:
216 215 with open(fn) as f:
217 216 s = f.read()
218 217
219 218 dot_name = (fn[:-3] if fn.endswith('.py') else fn).replace('/', '.')
220 219 file_imports = set()
221 220 for m in re.finditer(r'^ *(?:from ([^ ]*) import (?:([a-zA-Z].*)|\(([^)]*)\))|import (.*))$', s, re.MULTILINE):
222 221 m_from, m_from_import, m_from_import2, m_import = m.groups()
223 222 if m_from:
224 223 pre = m_from + '.'
225 224 if pre.startswith('.'):
226 225 pre = dot_name.rsplit('.', 1)[0] + pre
227 226 importlist = m_from_import or m_from_import2
228 227 else:
229 228 pre = ''
230 229 importlist = m_import
231 230 for imp in importlist.split('#', 1)[0].split(','):
232 231 full_imp = pre + imp.strip().split(' as ', 1)[0]
233 232 file_imports.add(full_imp)
234 233 import_deps.add((dot_name, full_imp))
235 234 files_imports[fn] = file_imports
236 235
237 236 # dump out all deps for debugging and analysis
238 237 with open('deps.txt', 'w') as f:
239 238 for fn, file_imports in sorted(files_imports.items()):
240 239 for file_import in sorted(file_imports):
241 240 if file_import.split('.', 1)[0] in ignored_modules:
242 241 continue
243 242 f.write('%s: %s\n' % (fn, file_import))
244 243
245 244 # find leafs that haven't been ignored - they are the important external dependencies and shown in the bottom row
246 245 only_imported = set(
247 246 set(normalize(b) for a, b in import_deps) -
248 247 set(normalize(a) for a, b in import_deps) -
249 248 set([None, 'kallithea'])
250 249 )
251 250
252 251 normalized_dep_edges = set()
253 252 for dot_name, full_imp in import_deps:
254 253 a = normalize(dot_name)
255 254 b = normalize(full_imp)
256 255 if a is None or b is None or a == b:
257 256 continue
258 257 normalized_dep_edges.add((a, b))
259 258 #print((dot_name, full_imp, a, b))
260 259 normalized_dep_edges.update(extra_edges)
261 260
262 261 unseen_shown_modules = shown_modules.difference(a for a, b in normalized_dep_edges).difference(b for a, b in normalized_dep_edges)
263 262 assert not unseen_shown_modules, unseen_shown_modules
264 263
265 264 with open('deps.dot', 'w') as f:
266 265 f.write('digraph {\n')
267 266 f.write('subgraph { rank = same; %s}\n' % ''.join('"%s"; ' % s for s in sorted(top_modules)))
268 267 f.write('subgraph { rank = same; %s}\n' % ''.join('"%s"; ' % s for s in sorted(only_imported)))
269 268 for a, b in sorted(normalized_dep_edges):
270 269 f.write(' "%s" -> "%s"%s\n' % (a, b, ' [color=red]' if (a, b) in known_violations else ' [color=green]' if (a, b) in extra_edges else ''))
271 270 f.write('}\n')
272 271
273 272 # verify dependencies by untangling dependency chain bottom-up:
274 273 todo = set(normalized_dep_edges)
275 274 for x in known_violations:
276 275 todo.remove(x)
277 276
278 277 while todo:
279 278 depending = set(a for a, b in todo)
280 279 depended = set(b for a, b in todo)
281 280 drop = depended - depending
282 281 if not drop:
283 282 print('ERROR: cycles:', len(todo))
284 283 for x in sorted(todo):
285 284 print('%s,' % (x,))
286 285 raise SystemExit(1)
287 286 #for do_b in sorted(drop):
288 287 # print('Picking', do_b, '- unblocks:', ' '.join(a for a, b in sorted((todo)) if b == do_b))
289 288 todo = set((a, b) for a, b in todo if b in depending)
290 289 #print()
291 290
292 291
293 292 if __name__ == '__main__':
294 293 main(sys.argv[1:])
General Comments 0
You need to be logged in to leave comments. Login now