diff --git a/kallithea/controllers/admin/gists.py b/kallithea/controllers/admin/gists.py --- a/kallithea/controllers/admin/gists.py +++ b/kallithea/controllers/admin/gists.py @@ -182,7 +182,7 @@ class GistsController(BaseController): log.error(traceback.format_exc()) raise HTTPNotFound() if format == 'raw': - content = '\n\n'.join([f.content for f in c.files if (f_path is None or safe_unicode(f.path) == f_path)]) + content = '\n\n'.join([safe_unicode(f.content) for f in c.files if (f_path is None or safe_unicode(f.path) == f_path)]) response.content_type = 'text/plain' return content return render('admin/gists/show.html') diff --git a/kallithea/controllers/compare.py b/kallithea/controllers/compare.py --- a/kallithea/controllers/compare.py +++ b/kallithea/controllers/compare.py @@ -272,7 +272,7 @@ class CompareController(BaseRepoControll ignore_whitespace=ignore_whitespace, context=line_context) - diff_processor = diffs.DiffProcessor(raw_diff or '', diff_limit=diff_limit) + diff_processor = diffs.DiffProcessor(raw_diff, diff_limit=diff_limit) c.limited_diff = diff_processor.limited_diff c.file_diff_data = [] c.lines_added = 0 diff --git a/kallithea/controllers/feed.py b/kallithea/controllers/feed.py --- a/kallithea/controllers/feed.py +++ b/kallithea/controllers/feed.py @@ -94,7 +94,7 @@ class FeedController(BaseRepoController) desc_msg.extend(changes) if str2bool(CONFIG.get('rss_include_diff', False)): desc_msg.append('\n\n') - desc_msg.append(raw_diff) + desc_msg.append(safe_unicode(raw_diff)) desc_msg.append('') return [safe_unicode(chunk) for chunk in desc_msg] diff --git a/kallithea/controllers/files.py b/kallithea/controllers/files.py --- a/kallithea/controllers/files.py +++ b/kallithea/controllers/files.py @@ -46,7 +46,7 @@ from kallithea.lib.auth import HasRepoPe from kallithea.lib.base import BaseRepoController, jsonify, render from kallithea.lib.exceptions import NonRelativePathError from kallithea.lib.utils import action_logger -from kallithea.lib.utils2 import convert_line_endings, detect_mode, safe_int, safe_str, str2bool +from kallithea.lib.utils2 import convert_line_endings, detect_mode, safe_int, safe_str, safe_unicode, str2bool from kallithea.lib.vcs.backends.base import EmptyChangeset from kallithea.lib.vcs.conf import settings from kallithea.lib.vcs.exceptions import ( @@ -365,8 +365,7 @@ class FilesController(BaseRepoController c.f_path = f_path if r_post: - - old_content = c.file.content + old_content = safe_unicode(c.file.content) sl = old_content.splitlines(1) first_line = sl[0] if sl else '' # modes: 0 - Unix, 1 - Mac, 2 - DOS diff --git a/kallithea/controllers/pullrequests.py b/kallithea/controllers/pullrequests.py --- a/kallithea/controllers/pullrequests.py +++ b/kallithea/controllers/pullrequests.py @@ -591,7 +591,7 @@ class PullrequestsController(BaseRepoCon ignore_whitespace=ignore_whitespace, context=line_context) except ChangesetDoesNotExistError: raw_diff = _("The diff can't be shown - the PR revisions could not be found.") - diff_processor = diffs.DiffProcessor(raw_diff or '', diff_limit=diff_limit) + diff_processor = diffs.DiffProcessor(raw_diff, diff_limit=diff_limit) c.limited_diff = diff_processor.limited_diff c.file_diff_data = [] c.lines_added = 0 diff --git a/kallithea/controllers/summary.py b/kallithea/controllers/summary.py --- a/kallithea/controllers/summary.py +++ b/kallithea/controllers/summary.py @@ -46,7 +46,7 @@ from kallithea.lib.celerylib.tasks impor from kallithea.lib.compat import json from kallithea.lib.markup_renderer import MarkupRenderer from kallithea.lib.page import Page -from kallithea.lib.utils2 import safe_int +from kallithea.lib.utils2 import safe_int, safe_unicode from kallithea.lib.vcs.backends.base import EmptyChangeset from kallithea.lib.vcs.exceptions import ChangesetError, EmptyRepositoryError, NodeDoesNotExistError from kallithea.lib.vcs.nodes import FileNode @@ -84,7 +84,7 @@ class SummaryController(BaseRepoControll readme_file = f log.debug('Found README file `%s` rendering...', readme_file) - readme_data = renderer.render(readme.content, + readme_data = renderer.render(safe_unicode(readme.content), filename=f) break except NodeDoesNotExistError: diff --git a/kallithea/lib/annotate.py b/kallithea/lib/annotate.py --- a/kallithea/lib/annotate.py +++ b/kallithea/lib/annotate.py @@ -30,6 +30,7 @@ from pygments.formatters import HtmlForm from kallithea.lib.vcs.exceptions import VCSError from kallithea.lib.vcs.nodes import FileNode +from kallithea.lib.vcs.utils import safe_unicode def annotate_highlight(filenode, annotate_from_changeset_func=None, @@ -53,7 +54,7 @@ def annotate_highlight(filenode, annotat headers=headers, annotate_from_changeset_func=annotate_from_changeset_func, **options) lexer = get_custom_lexer(filenode.extension) or filenode.lexer - highlighted = highlight(filenode.content, lexer, formatter) + highlighted = highlight(safe_unicode(filenode.content), lexer, formatter) return highlighted diff --git a/kallithea/lib/diffs.py b/kallithea/lib/diffs.py --- a/kallithea/lib/diffs.py +++ b/kallithea/lib/diffs.py @@ -289,8 +289,8 @@ class DiffProcessor(object): based on that parameter cut off will be triggered, set to None to show full diff """ - if not isinstance(diff, basestring): - raise Exception('Diff must be a basestring got %s instead' % type(diff)) + if not isinstance(diff, bytes): + raise Exception('Diff must be bytes - got %s' % type(diff)) self._diff = diff self.adds = 0 @@ -516,6 +516,9 @@ def _escaper(string): """, re.VERBOSE | re.MULTILINE) +_header_next_check = re.compile(br'''(?!@)(?!literal )(?!delta )''') + + def _get_header(vcs, diff_chunk): """ Parses a Git diff for a single file (header and chunks) and returns a tuple with: @@ -537,7 +540,7 @@ def _get_header(vcs, diff_chunk): raise Exception('diff not recognized as valid %s diff' % vcs) meta_info = match.groupdict() rest = diff_chunk[match.end():] - if rest and not rest.startswith('@') and not rest.startswith('literal ') and not rest.startswith('delta '): + if rest and _header_next_check.match(rest): raise Exception('cannot parse %s diff header: %r followed by %r' % (vcs, diff_chunk[:match.end()], rest[:1000])) diff_lines = (_escaper(m.group(0)) for m in re.finditer(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do return meta_info, diff_lines diff --git a/kallithea/lib/helpers.py b/kallithea/lib/helpers.py --- a/kallithea/lib/helpers.py +++ b/kallithea/lib/helpers.py @@ -330,7 +330,7 @@ def pygmentize(filenode, **kwargs): """ lexer = get_custom_lexer(filenode.extension) or filenode.lexer return literal(markup_whitespace( - code_highlight(filenode.content, lexer, CodeHtmlFormatter(**kwargs)))) + code_highlight(safe_unicode(filenode.content), lexer, CodeHtmlFormatter(**kwargs)))) def pygmentize_annotation(repo_name, filenode, **kwargs): diff --git a/kallithea/lib/indexers/daemon.py b/kallithea/lib/indexers/daemon.py --- a/kallithea/lib/indexers/daemon.py +++ b/kallithea/lib/indexers/daemon.py @@ -182,12 +182,13 @@ class WhooshIndexingDaemon(object): indexed = indexed_w_content = 0 if self.is_indexable_node(node): - u_content = node.content - if not isinstance(u_content, unicode): + bytes_content = node.content + if b'\0' in bytes_content: log.warning(' >> %s - no text content', path) u_content = u'' else: log.debug(' >> %s', path) + u_content = safe_unicode(bytes_content) indexed_w_content += 1 else: diff --git a/kallithea/lib/vcs/backends/git/inmemory.py b/kallithea/lib/vcs/backends/git/inmemory.py --- a/kallithea/lib/vcs/backends/git/inmemory.py +++ b/kallithea/lib/vcs/backends/git/inmemory.py @@ -68,11 +68,7 @@ class GitInMemoryChangeset(BaseInMemoryC # for dirnames (in reverse order) [this only applies for nodes from added] new_trees = [] - if not node.is_binary: - content = node.content.encode(ENCODING) - else: - content = node.content - blob = objects.Blob.from_string(content) + blob = objects.Blob.from_string(node.content) node_path = safe_bytes(node.name) if dirnames: diff --git a/kallithea/lib/vcs/backends/hg/inmemory.py b/kallithea/lib/vcs/backends/hg/inmemory.py --- a/kallithea/lib/vcs/backends/hg/inmemory.py +++ b/kallithea/lib/vcs/backends/hg/inmemory.py @@ -52,8 +52,7 @@ class MercurialInMemoryChangeset(BaseInM for node in self.added: if node.path == path: return memfilectx(_repo, memctx, path=node.path, - data=(node.content.encode('utf-8') - if not node.is_binary else node.content), + data=node.content, islink=False, isexec=node.is_executable, copysource=False) @@ -62,8 +61,7 @@ class MercurialInMemoryChangeset(BaseInM for node in self.changed: if node.path == path: return memfilectx(_repo, memctx, path=node.path, - data=(node.content.encode('utf-8') - if not node.is_binary else node.content), + data=node.content, islink=False, isexec=node.is_executable, copysource=False) diff --git a/kallithea/lib/vcs/nodes.py b/kallithea/lib/vcs/nodes.py --- a/kallithea/lib/vcs/nodes.py +++ b/kallithea/lib/vcs/nodes.py @@ -16,7 +16,7 @@ import stat from kallithea.lib.vcs.backends.base import EmptyChangeset from kallithea.lib.vcs.exceptions import NodeError, RemovedFileNodeError -from kallithea.lib.vcs.utils import safe_str, safe_unicode +from kallithea.lib.vcs.utils import safe_bytes, safe_str, safe_unicode from kallithea.lib.vcs.utils.lazy import LazyProperty @@ -263,6 +263,10 @@ class FileNode(Node): raise NodeError("Cannot use both content and changeset") super(FileNode, self).__init__(path, kind=NodeKind.FILE) self.changeset = changeset + if not isinstance(content, bytes) and content is not None: + # File content is one thing that inherently must be bytes ... but + # VCS module tries to be "user friendly" and support unicode ... + content = safe_bytes(content) self._content = content self._mode = mode or 0o100644 @@ -278,25 +282,17 @@ class FileNode(Node): mode = self._mode return mode - def _get_content(self): + @property + def content(self): + """ + Returns lazily byte content of the FileNode. + """ if self.changeset: content = self.changeset.get_file_content(self.path) else: content = self._content return content - @property - def content(self): - """ - Returns lazily content of the FileNode. If possible, would try to - decode content from UTF-8. - """ - content = self._get_content() - - if bool(content and '\0' in content): - return content - return safe_unicode(content) - @LazyProperty def size(self): if self.changeset: @@ -366,7 +362,7 @@ class FileNode(Node): """ from pygments import lexers try: - lexer = lexers.guess_lexer_for_filename(self.name, self.content, stripnl=False) + lexer = lexers.guess_lexer_for_filename(self.name, safe_unicode(self.content), stripnl=False) except lexers.ClassNotFound: lexer = lexers.TextLexer(stripnl=False) # returns first alias @@ -414,8 +410,7 @@ class FileNode(Node): """ Returns True if file has binary content. """ - _bin = '\0' in self._get_content() - return _bin + return b'\0' in self.content def is_browser_compatible_image(self): return self.mimetype in [ diff --git a/kallithea/lib/vcs/utils/annotate.py b/kallithea/lib/vcs/utils/annotate.py --- a/kallithea/lib/vcs/utils/annotate.py +++ b/kallithea/lib/vcs/utils/annotate.py @@ -3,6 +3,7 @@ from pygments.formatters import HtmlForm from kallithea.lib.vcs.exceptions import VCSError from kallithea.lib.vcs.nodes import FileNode +from kallithea.lib.vcs.utils import safe_unicode def annotate_highlight(filenode, annotate_from_changeset_func=None, @@ -24,9 +25,7 @@ def annotate_highlight(filenode, annotat formatter = AnnotateHtmlFormatter(filenode=filenode, order=order, headers=headers, annotate_from_changeset_func=annotate_from_changeset_func, **options) - lexer = filenode.lexer - highlighted = highlight(filenode.content, lexer, formatter) - return highlighted + return highlight(safe_unicode(filenode.content), filenode.lexer, formatter) class AnnotateHtmlFormatter(HtmlFormatter): diff --git a/kallithea/templates/admin/gists/edit.html b/kallithea/templates/admin/gists/edit.html --- a/kallithea/templates/admin/gists/edit.html +++ b/kallithea/templates/admin/gists/edit.html @@ -73,7 +73,7 @@
- +
diff --git a/kallithea/templates/files/files_edit.html b/kallithea/templates/files/files_edit.html --- a/kallithea/templates/files/files_edit.html +++ b/kallithea/templates/files/files_edit.html @@ -59,7 +59,7 @@
- +
diff --git a/kallithea/tests/vcs/test_git.py b/kallithea/tests/vcs/test_git.py --- a/kallithea/tests/vcs/test_git.py +++ b/kallithea/tests/vcs/test_git.py @@ -596,11 +596,11 @@ class TestGitChangeset(object): for cs in self.repo: assert isinstance(cs.author, unicode) - def test_repo_files_content_is_unicode(self): + def test_repo_files_content_is_bytes(self): changeset = self.repo.get_changeset() for node in changeset.get_node('/'): if node.is_file(): - assert isinstance(node.content, unicode) + assert isinstance(node.content, bytes) def test_wrong_path(self): # There is 'setup.py' in the root dir but not there: diff --git a/kallithea/tests/vcs/test_hg.py b/kallithea/tests/vcs/test_hg.py --- a/kallithea/tests/vcs/test_hg.py +++ b/kallithea/tests/vcs/test_hg.py @@ -544,11 +544,11 @@ class TestMercurialChangeset(object): for cm in self.repo: assert isinstance(cm.author, unicode) - def test_repo_files_content_is_unicode(self): + def test_repo_files_content_is_bytes(self): test_changeset = self.repo.get_changeset(100) for node in test_changeset.get_node('/'): if node.is_file(): - assert isinstance(node.content, unicode) + assert isinstance(node.content, bytes) def test_wrong_path(self): # There is 'setup.py' in the root dir but not there: