# HG changeset patch # User Mads Kiilerich # Date 2020-11-13 00:04:30 # Node ID 52816813cbecd4ac6bb1d99e10c1a7fadacbc734 # Parent f8971422795e0476dda0aaa5ece9e46e092968fd docs: describe, visualize, and verify internal code structure and layering Try to describe something that isn't entirely there yet. deps.py will help track and minimize violations through checks and visualization in deps.svg . diff --git a/.hgignore b/.hgignore --- a/.hgignore +++ b/.hgignore @@ -53,3 +53,6 @@ syntax: regexp ^\.pytest_cache$ ^venv$ /__pycache__$ +^deps\.dot$ +^deps\.svg$ +^deps\.txt$ diff --git a/docs/contributing.rst b/docs/contributing.rst --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -72,6 +72,94 @@ review and inclusion, via the mailing li .. _contributing-tests: +Internal dependencies +--------------------- + +We try to keep the code base clean and modular and avoid circular dependencies. +Code should only invoke code in layers below itself. + +Imports should import whole modules ``from`` their parent module, perhaps +``as`` a shortened name. Avoid imports ``from`` modules. + +To avoid cycles and partially initialized modules, ``__init__.py`` should *not* +contain any non-trivial imports. The top level of a module should *not* be a +facade for the module functionality. + +Common code for a module is often in ``base.py``. + +The important part of the dependency graph is approximately linear. In the +following list, modules may only depend on modules below them: + +``tests`` + Just get the job done - anything goes. + +``bin/`` & ``config/`` & ``alembic/`` + The main entry points, defined in ``setup.py``. Note: The TurboGears template + use ``config`` for the high WSGI application - this is not for low level + configuration. + +``controllers/`` + The top level web application, with TurboGears using the ``root`` controller + as entry point, and ``routing`` dispatching to other controllers. + +``templates/**.html`` + The "view", rendering to HTML. Invoked by controllers which can pass them + anything from lower layers - especially ``helpers`` available as ``h`` will + cut through all layers, and ``c`` gives access to global variables. + +``lib/helpers.py`` + High level helpers, exposing everything to templates as ``h``. It depends on + everything and has a huge dependency chain, so it should not be used for + anything else. TODO. + +``controlles/base.py`` + The base class of controllers, with lots of model knowledge. + +``lib/auth.py`` + All things related to authentication. TODO. + +``lib/utils.py`` + High level utils with lots of model knowledge. TODO. + +``lib/hooks.py`` + Hooks into "everything" to give centralized logging to database, cache + invalidation, and extension handling. TODO. + +``model/`` + Convenience business logic wrappers around database models. + +``model/db.py`` + Defines the database schema and provides some additional logic. + +``model/scm.py`` + All things related to anything. TODO. + +SQLAlchemy + Database session and transaction in thread-local variables. + +``lib/utils2.py`` + Low level utils specific to Kallithea. + +``lib/webutils.py`` + Low level generic utils with awareness of the TurboGears environment. + +TurboGears + Request, response and state like i18n gettext in thread-local variables. + External dependency with global state - usage should be minimized. + +``lib/vcs/`` + Previously an independent library. No awareness of web, database, or state. + +``lib/*`` + Various "pure" functionality not depending on anything else. + +``__init__`` + Very basic Kallithea constants - some of them are set very early based on ``.ini``. + +This is not exactly how it is right now, but we aim for something like that. +Especially the areas marked as TODO have some problems that need untangling. + + Running tests ------------- diff --git a/scripts/deps.py b/scripts/deps.py new file mode 100755 --- /dev/null +++ b/scripts/deps.py @@ -0,0 +1,295 @@ +#!/usr/bin/env python3 + + +import re +import sys + + +ignored_modules = set(''' +argparse +base64 +bcrypt +binascii +bleach +calendar +celery +celery +chardet +click +collections +configparser +copy +csv +ctypes +datetime +dateutil +decimal +decorator +difflib +distutils +docutils +email +errno +fileinput +functools +getpass +grp +hashlib +hmac +html +http +imp +importlib +inspect +io +ipaddr +IPython +isapi_wsgi +itertools +json +kajiki +ldap +logging +mako +markdown +mimetypes +mock +msvcrt +multiprocessing +operator +os +paginate +paginate_sqlalchemy +pam +paste +pkg_resources +platform +posixpath +pprint +pwd +pyflakes +pytest +pytest_localserver +random +re +routes +setuptools +shlex +shutil +smtplib +socket +ssl +stat +string +struct +subprocess +sys +tarfile +tempfile +textwrap +tgext +threading +time +traceback +traitlets +types +urllib +urlobject +uuid +warnings +webhelpers2 +webob +webtest +whoosh +win32traceutil +zipfile +'''.split()) + +top_modules = set(''' +kallithea.alembic +kallithea.bin +kallithea.config +kallithea.controllers +kallithea.templates.py +scripts +'''.split()) + +bottom_external_modules = set(''' +tg +mercurial +sqlalchemy +alembic +formencode +pygments +dulwich +beaker +psycopg2 +docs +setup +conftest +'''.split()) + +normal_modules = set(''' +kallithea +kallithea.lib.celerylib.tasks +kallithea.lib +kallithea.lib.auth +kallithea.lib.auth_modules +kallithea.lib.base +kallithea.lib.celerylib +kallithea.lib.db_manage +kallithea.lib.helpers +kallithea.lib.hooks +kallithea.lib.indexers +kallithea.lib.utils +kallithea.lib.utils2 +kallithea.lib.vcs +kallithea.lib.webutils +kallithea.model +kallithea.model.scm +kallithea.templates.py +'''.split()) + +shown_modules = normal_modules | top_modules + +# break the chains somehow - this is a cleanup TODO list +known_violations = [ +('kallithea.lib.auth_modules', 'kallithea.lib.auth'), # needs base&facade +('kallithea.lib.utils', 'kallithea.model'), # clean up utils +('kallithea.lib.utils', 'kallithea.model.db'), +('kallithea.lib.utils', 'kallithea.model.scm'), +('kallithea.lib.celerylib.tasks', 'kallithea.lib.helpers'), +('kallithea.lib.celerylib.tasks', 'kallithea.lib.hooks'), +('kallithea.lib.celerylib.tasks', 'kallithea.lib.indexers'), +('kallithea.lib.celerylib.tasks', 'kallithea.model'), +('kallithea.model', 'kallithea.lib.auth'), # auth.HasXXX +('kallithea.model', 'kallithea.lib.auth_modules'), # validators +('kallithea.model', 'kallithea.lib.helpers'), +('kallithea.model', 'kallithea.lib.hooks'), # clean up hooks +('kallithea.model', 'kallithea.model.scm'), +('kallithea.model.scm', 'kallithea.lib.hooks'), +] + +extra_edges = [ +('kallithea.config', 'kallithea.controllers'), # through TG +('kallithea.lib.auth', 'kallithea.lib.auth_modules'), # custom loader +] + + +def normalize(s): + """Given a string with dot path, return the string it should be shown as.""" + parts = s.replace('.__init__', '').split('.') + short_2 = '.'.join(parts[:2]) + short_3 = '.'.join(parts[:3]) + short_4 = '.'.join(parts[:4]) + if parts[0] in ['scripts', 'contributor_data', 'i18n_utils']: + return 'scripts' + if short_3 == 'kallithea.model.meta': + return 'kallithea.model.db' + if parts[:4] == ['kallithea', 'lib', 'vcs', 'ssh']: + return 'kallithea.lib.vcs.ssh' + if short_4 in shown_modules: + return short_4 + if short_3 in shown_modules: + return short_3 + if short_2 in shown_modules: + return short_2 + if short_2 == 'kallithea.tests': + return None + if parts[0] in ignored_modules: + return None + assert parts[0] in bottom_external_modules, parts + return parts[0] + + +def main(filenames): + if not filenames or filenames[0].startswith('-'): + print('''\ +Usage: + hg files 'set:!binary()&grep("^#!.*python")' 'set:**.py' | xargs scripts/deps.py + dot -Tsvg deps.dot > deps.svg + ''') + raise SystemExit(1) + + files_imports = dict() # map filenames to its imports + import_deps = set() # set of tuples with module name and its imports + for fn in filenames: + with open(fn) as f: + s = f.read() + + dot_name = (fn[:-3] if fn.endswith('.py') else fn).replace('/', '.') + file_imports = set() + for m in re.finditer(r'^ *(?:from ([^ ]*) import (?:([a-zA-Z].*)|\(([^)]*)\))|import (.*))$', s, re.MULTILINE): + m_from, m_from_import, m_from_import2, m_import = m.groups() + if m_from: + pre = m_from + '.' + if pre.startswith('.'): + pre = dot_name.rsplit('.', 1)[0] + pre + importlist = m_from_import or m_from_import2 + else: + pre = '' + importlist = m_import + for imp in importlist.split('#', 1)[0].split(','): + full_imp = pre + imp.strip().split(' as ', 1)[0] + file_imports.add(full_imp) + import_deps.add((dot_name, full_imp)) + files_imports[fn] = file_imports + + # dump out all deps for debugging and analysis + with open('deps.txt', 'w') as f: + for fn, file_imports in sorted(files_imports.items()): + for file_import in sorted(file_imports): + if file_import.split('.', 1)[0] in ignored_modules: + continue + f.write('%s: %s\n' % (fn, file_import)) + + # find leafs that haven't been ignored - they are the important external dependencies and shown in the bottom row + only_imported = set( + set(normalize(b) for a, b in import_deps) - + set(normalize(a) for a, b in import_deps) - + set([None, 'kallithea']) + ) + + normalized_dep_edges = set() + for dot_name, full_imp in import_deps: + a = normalize(dot_name) + b = normalize(full_imp) + if a is None or b is None or a == b: + continue + normalized_dep_edges.add((a, b)) + #print((dot_name, full_imp, a, b)) + normalized_dep_edges.update(extra_edges) + + unseen_shown_modules = shown_modules.difference(a for a, b in normalized_dep_edges).difference(b for a, b in normalized_dep_edges) + assert not unseen_shown_modules, unseen_shown_modules + + with open('deps.dot', 'w') as f: + f.write('digraph {\n') + f.write('subgraph { rank = same; %s}\n' % ''.join('"%s"; ' % s for s in sorted(top_modules))) + f.write('subgraph { rank = same; %s}\n' % ''.join('"%s"; ' % s for s in sorted(only_imported))) + for a, b in sorted(normalized_dep_edges): + f.write(' "%s" -> "%s"%s\n' % (a, b, ' [color=red]' if (a, b) in known_violations else ' [color=green]' if (a, b) in extra_edges else '')) + f.write('}\n') + + # verify dependencies by untangling dependency chain bottom-up: + todo = set(normalized_dep_edges) + for x in known_violations: + todo.remove(x) + + while todo: + depending = set(a for a, b in todo) + depended = set(b for a, b in todo) + drop = depended - depending + if not drop: + print('ERROR: cycles:', len(todo)) + for x in sorted(todo): + print('%s,' % (x,)) + raise SystemExit(1) + #for do_b in sorted(drop): + # print('Picking', do_b, '- unblocks:', ' '.join(a for a, b in sorted((todo)) if b == do_b)) + todo = set((a, b) for a, b in todo if b in depending) + #print() + + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/scripts/run-all-cleanup b/scripts/run-all-cleanup --- a/scripts/run-all-cleanup +++ b/scripts/run-all-cleanup @@ -5,6 +5,9 @@ set -e set -x +hg files 'set:!binary()&grep("^#!.*python")' 'set:**.py' | xargs scripts/deps.py +dot -Tsvg deps.dot > deps.svg + scripts/docs-headings.py scripts/generate-ini.py scripts/whitespacecleanup.sh