hggettext
163 lines
| 5.2 KiB
| text/plain
|
TextLexer
/ i18n / hggettext
Martin Geisler
|
r8542 | #!/usr/bin/env python | ||
# | ||||
# hggettext - carefully extract docstrings for Mercurial | ||||
# | ||||
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Martin Geisler
|
r8542 | |||
# The normalize function is taken from pygettext which is distributed | ||||
# with Python under the Python License, which is GPL compatible. | ||||
"""Extract docstrings from Mercurial commands. | ||||
Compared to pygettext, this script knows about the cmdtable and table | ||||
dictionaries used by Mercurial, and will only extract docstrings from | ||||
functions mentioned therein. | ||||
Use xgettext like normal to extract strings marked as translatable and | ||||
join the message cataloges to get the final catalog. | ||||
""" | ||||
Pulkit Goyal
|
r29171 | from __future__ import absolute_import, print_function | ||
Pulkit Goyal
|
r29170 | |||
import inspect | ||||
import os | ||||
FUJIWARA Katsunori
|
r33816 | import re | ||
Pulkit Goyal
|
r29170 | import sys | ||
Martin Geisler
|
r8542 | |||
def escape(s): | ||||
# The order is important, the backslash must be escaped first | ||||
# since the other replacements introduce new backslashes | ||||
# themselves. | ||||
s = s.replace('\\', '\\\\') | ||||
s = s.replace('\n', '\\n') | ||||
s = s.replace('\r', '\\r') | ||||
s = s.replace('\t', '\\t') | ||||
s = s.replace('"', '\\"') | ||||
return s | ||||
def normalize(s): | ||||
# This converts the various Python string types into a format that | ||||
# is appropriate for .po files, namely much closer to C style. | ||||
lines = s.split('\n') | ||||
if len(lines) == 1: | ||||
s = '"' + escape(s) + '"' | ||||
else: | ||||
if not lines[-1]: | ||||
del lines[-1] | ||||
lines[-1] = lines[-1] + '\n' | ||||
lines = map(escape, lines) | ||||
lineterm = '\\n"\n"' | ||||
s = '""\n"' + lineterm.join(lines) + '"' | ||||
return s | ||||
def poentry(path, lineno, s): | ||||
return ('#: %s:%d\n' % (path, lineno) + | ||||
'msgid %s\n' % normalize(s) + | ||||
'msgstr ""\n') | ||||
FUJIWARA Katsunori
|
r33816 | doctestre = re.compile(r'^ +>>> ', re.MULTILINE) | ||
Martin Geisler
|
r8542 | |||
FUJIWARA Katsunori
|
r38851 | def offset(src, doc, name, lineno, default): | ||
Martin Geisler
|
r8542 | """Compute offset or issue a warning on stdout.""" | ||
FUJIWARA Katsunori
|
r33816 | # remove doctest part, in order to avoid backslash mismatching | ||
m = doctestre.search(doc) | ||||
if m: | ||||
doc = doc[:m.start()] | ||||
Martin Geisler
|
r8542 | # Backslashes in doc appear doubled in src. | ||
end = src.find(doc.replace('\\', '\\\\')) | ||||
if end == -1: | ||||
# This can happen if the docstring contains unnecessary escape | ||||
# sequences such as \" in a triple-quoted string. The problem | ||||
# is that \" is turned into " and so doc wont appear in src. | ||||
FUJIWARA Katsunori
|
r38851 | sys.stderr.write("%s:%d:warning:" | ||
" unknown docstr offset, assuming %d lines\n" | ||||
% (name, lineno, default)) | ||||
Martin Geisler
|
r8542 | return default | ||
else: | ||||
return src.count('\n', 0, end) | ||||
def importpath(path): | ||||
"""Import a path like foo/bar/baz.py and return the baz module.""" | ||||
if path.endswith('.py'): | ||||
path = path[:-3] | ||||
if path.endswith('/__init__'): | ||||
path = path[:-9] | ||||
path = path.replace('/', '.') | ||||
mod = __import__(path) | ||||
for comp in path.split('.')[1:]: | ||||
mod = getattr(mod, comp) | ||||
return mod | ||||
def docstrings(path): | ||||
"""Extract docstrings from path. | ||||
This respects the Mercurial cmdtable/table convention and will | ||||
only extract docstrings from functions mentioned in these tables. | ||||
""" | ||||
mod = importpath(path) | ||||
FUJIWARA Katsunori
|
r33818 | if not path.startswith('mercurial/') and mod.__doc__: | ||
Augie Fackler
|
r36966 | with open(path) as fobj: | ||
src = fobj.read() | ||||
FUJIWARA Katsunori
|
r38851 | lineno = 1 + offset(src, mod.__doc__, path, 1, 7) | ||
Pulkit Goyal
|
r29171 | print(poentry(path, lineno, mod.__doc__)) | ||
Martin Geisler
|
r8542 | |||
Patrick Mezard
|
r12823 | functions = list(getattr(mod, 'i18nfunctions', [])) | ||
functions = [(f, True) for f in functions] | ||||
Martin Geisler
|
r8542 | cmdtable = getattr(mod, 'cmdtable', {}) | ||
if not cmdtable: | ||||
# Maybe we are processing mercurial.commands? | ||||
cmdtable = getattr(mod, 'table', {}) | ||||
Patrick Mezard
|
r12823 | functions.extend((c[0], False) for c in cmdtable.itervalues()) | ||
Martin Geisler
|
r8542 | |||
Patrick Mezard
|
r12823 | for func, rstrip in functions: | ||
Martin Geisler
|
r8542 | if func.__doc__: | ||
FUJIWARA Katsunori
|
r33817 | docobj = func # this might be a proxy to provide formatted doc | ||
func = getattr(func, '_origfunc', func) | ||||
FUJIWARA Katsunori
|
r33623 | funcmod = inspect.getmodule(func) | ||
extra = '' | ||||
if funcmod.__package__ == funcmod.__name__: | ||||
extra = '/__init__' | ||||
actualpath = '%s%s.py' % (funcmod.__name__.replace('.', '/'), extra) | ||||
Martin Geisler
|
r8542 | src = inspect.getsource(func) | ||
Yuya Nishihara
|
r29720 | lineno = inspect.getsourcelines(func)[1] | ||
FUJIWARA Katsunori
|
r33817 | doc = docobj.__doc__ | ||
origdoc = getattr(docobj, '_origdoc', '') | ||||
Patrick Mezard
|
r12823 | if rstrip: | ||
doc = doc.rstrip() | ||||
FUJIWARA Katsunori
|
r33814 | origdoc = origdoc.rstrip() | ||
if origdoc: | ||||
FUJIWARA Katsunori
|
r38851 | lineno += offset(src, origdoc, actualpath, lineno, 1) | ||
FUJIWARA Katsunori
|
r33814 | else: | ||
FUJIWARA Katsunori
|
r38851 | lineno += offset(src, doc, actualpath, lineno, 1) | ||
FUJIWARA Katsunori
|
r33623 | print(poentry(actualpath, lineno, doc)) | ||
Martin Geisler
|
r8542 | |||
Martin Geisler
|
r9539 | def rawtext(path): | ||
Augie Fackler
|
r36966 | with open(path) as f: | ||
src = f.read() | ||||
Pulkit Goyal
|
r29171 | print(poentry(path, 1, src)) | ||
Martin Geisler
|
r9539 | |||
Martin Geisler
|
r8542 | if __name__ == "__main__": | ||
Martin Geisler
|
r8626 | # It is very important that we import the Mercurial modules from | ||
# the source tree where hggettext is executed. Otherwise we might | ||||
# accidentally import and extract strings from a Mercurial | ||||
# installation mentioned in PYTHONPATH. | ||||
sys.path.insert(0, os.getcwd()) | ||||
from mercurial import demandimport; demandimport.enable() | ||||
Martin Geisler
|
r8542 | for path in sys.argv[1:]: | ||
Martin Geisler
|
r9539 | if path.endswith('.txt'): | ||
rawtext(path) | ||||
else: | ||||
docstrings(path) | ||||