upstream/kallithea Commit - r7942:9203621c

vcs: always return bytes from node.content...

Mads Kiilerich -

r7942:9203621c default

parent child

kallithea/controllers/admin/gists.py

0 +1 -1

                         log.error(traceback.format_exc())
                         raise HTTPNotFound()
                     if format == 'raw':
-                        content = '\n\n'.join([f.content for f in c.files if (f_path is None or safe_unicode(f.path) == f_path)])
+                        content = '\n\n'.join([safe_unicode(f.content) for f in c.files if (f_path is None or safe_unicode(f.path) == f_path)])
                         response.content_type = 'text/plain'
                         return content
                     return render('admin/gists/show.html')

kallithea/controllers/compare.py

0 +1 -1

                                                   ignore_whitespace=ignore_whitespace,
                                                   context=line_context)
-                    diff_processor = diffs.DiffProcessor(raw_diff or '', diff_limit=diff_limit)
+                    diff_processor = diffs.DiffProcessor(raw_diff, diff_limit=diff_limit)
                     c.limited_diff = diff_processor.limited_diff
                     c.file_diff_data = []
                     c.lines_added = 0

kallithea/controllers/feed.py

0 +1 -1

                     desc_msg.extend(changes)
                     if str2bool(CONFIG.get('rss_include_diff', False)):
                         desc_msg.append('\n\n')
-                        desc_msg.append(raw_diff)
+                        desc_msg.append(safe_unicode(raw_diff))
                     desc_msg.append('</pre>')
                     return [safe_unicode(chunk) for chunk in desc_msg]

kallithea/controllers/files.py

0 +2 -3

             from kallithea.lib.base import BaseRepoController, jsonify, render
             from kallithea.lib.exceptions import NonRelativePathError
             from kallithea.lib.utils import action_logger
-            from kallithea.lib.utils2 import convert_line_endings, detect_mode, safe_int, safe_str, str2bool
+            from kallithea.lib.utils2 import convert_line_endings, detect_mode, safe_int, safe_str, safe_unicode, str2bool
             from kallithea.lib.vcs.backends.base import EmptyChangeset
             from kallithea.lib.vcs.conf import settings
             from kallithea.lib.vcs.exceptions import (
                     c.f_path = f_path
                     if r_post:
+                        old_content = safe_unicode(c.file.content)
-                        old_content = c.file.content
                         sl = old_content.splitlines(1)
                         first_line = sl[0] if sl else ''
                         # modes:  0 - Unix, 1 - Mac, 2 - DOS

kallithea/controllers/pullrequests.py

0 +1 -1

                                                   ignore_whitespace=ignore_whitespace, context=line_context)
                     except ChangesetDoesNotExistError:
                         raw_diff = _("The diff can't be shown - the PR revisions could not be found.")
-                    diff_processor = diffs.DiffProcessor(raw_diff or '', diff_limit=diff_limit)
+                    diff_processor = diffs.DiffProcessor(raw_diff, diff_limit=diff_limit)
                     c.limited_diff = diff_processor.limited_diff
                     c.file_diff_data = []
                     c.lines_added = 0

kallithea/controllers/summary.py

0 +2 -2

             from kallithea.lib.compat import json
             from kallithea.lib.markup_renderer import MarkupRenderer
             from kallithea.lib.page import Page
-            from kallithea.lib.utils2 import safe_int
+            from kallithea.lib.utils2 import safe_int, safe_unicode
             from kallithea.lib.vcs.backends.base import EmptyChangeset
             from kallithea.lib.vcs.exceptions import ChangesetError, EmptyRepositoryError, NodeDoesNotExistError
             from kallithea.lib.vcs.nodes import FileNode
                                     readme_file = f
                                     log.debug('Found README file `%s` rendering...',
                                               readme_file)
-                                    readme_data = renderer.render(readme.content,
+                                    readme_data = renderer.render(safe_unicode(readme.content),
                                                                   filename=f)
                                     break
                                 except NodeDoesNotExistError:

kallithea/lib/annotate.py

0 +2 -1

             from kallithea.lib.vcs.exceptions import VCSError
             from kallithea.lib.vcs.nodes import FileNode
+            from kallithea.lib.vcs.utils import safe_unicode
             def annotate_highlight(filenode, annotate_from_changeset_func=None,
                     headers=headers,
                     annotate_from_changeset_func=annotate_from_changeset_func, **options)
                 lexer = get_custom_lexer(filenode.extension) or filenode.lexer
-                highlighted = highlight(filenode.content, lexer, formatter)
+                highlighted = highlight(safe_unicode(filenode.content), lexer, formatter)
                 return highlighted

kallithea/lib/diffs.py

0 +6 -3

                         based on that parameter cut off will be triggered, set to None
                         to show full diff
                     """
-                    if not isinstance(diff, basestring):
+                    if not isinstance(diff, bytes):
-                        raise Exception('Diff must be a basestring got %s instead' % type(diff))
+                        raise Exception('Diff must be bytes - got %s' % type(diff))
                     self._diff = diff
                     self.adds = 0
             """, re.VERBOSE | re.MULTILINE)
+            _header_next_check = re.compile(br'''(?!@)(?!literal )(?!delta )''')
             def _get_header(vcs, diff_chunk):
                 """
                 Parses a Git diff for a single file (header and chunks) and returns a tuple with:
                     raise Exception('diff not recognized as valid %s diff' % vcs)
                 meta_info = match.groupdict()
                 rest = diff_chunk[match.end():]
-                if rest and not rest.startswith('@') and not rest.startswith('literal ') and not rest.startswith('delta '):
+                if rest and _header_next_check.match(rest):
                     raise Exception('cannot parse %s diff header: %r followed by %r' % (vcs, diff_chunk[:match.end()], rest[:1000]))
                 diff_lines = (_escaper(m.group(0)) for m in re.finditer(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
                 return meta_info, diff_lines

kallithea/lib/helpers.py

0 +1 -1

                 """
                 lexer = get_custom_lexer(filenode.extension) or filenode.lexer
                 return literal(markup_whitespace(
-                    code_highlight(filenode.content, lexer, CodeHtmlFormatter(**kwargs))))
+                    code_highlight(safe_unicode(filenode.content), lexer, CodeHtmlFormatter(**kwargs))))
             def pygmentize_annotation(repo_name, filenode, **kwargs):

kallithea/lib/indexers/daemon.py

0 +3 -2

                     indexed = indexed_w_content = 0
                     if self.is_indexable_node(node):
-                        u_content = node.content
+                        bytes_content = node.content
-                        if not isinstance(u_content, unicode):
+                        if b'\0' in bytes_content:
                             log.warning('    >> %s - no text content', path)
                             u_content = u''
                         else:
                             log.debug('    >> %s', path)
+                            u_content = safe_unicode(bytes_content)
                             indexed_w_content += 1
                     else:

kallithea/lib/vcs/backends/git/inmemory.py

0 +1 -5

                         # for dirnames (in reverse order) [this only applies for nodes from added]
                         new_trees = []
-                        if not node.is_binary:
+                        blob = objects.Blob.from_string(node.content)
-                            content = node.content.encode(ENCODING)
-                        else:
-                            content = node.content
-                        blob = objects.Blob.from_string(content)
                         node_path = safe_bytes(node.name)
                         if dirnames:

kallithea/lib/vcs/backends/hg/inmemory.py

0 +2 -4

                         for node in self.added:
                             if node.path == path:
                                 return memfilectx(_repo, memctx, path=node.path,
-                                    data=(node.content.encode('utf-8')
+                                    data=node.content,
-                                          if not node.is_binary else node.content),
                                     islink=False,
                                     isexec=node.is_executable,
                                     copysource=False)
                         for node in self.changed:
                             if node.path == path:
                                 return memfilectx(_repo, memctx, path=node.path,
-                                    data=(node.content.encode('utf-8')
+                                    data=node.content,
-                                          if not node.is_binary else node.content),
                                     islink=False,
                                     isexec=node.is_executable,
                                     copysource=False)

kallithea/lib/vcs/nodes.py

0 +12 -17

             from kallithea.lib.vcs.backends.base import EmptyChangeset
             from kallithea.lib.vcs.exceptions import NodeError, RemovedFileNodeError
-            from kallithea.lib.vcs.utils import safe_str, safe_unicode
+            from kallithea.lib.vcs.utils import safe_bytes, safe_str, safe_unicode
             from kallithea.lib.vcs.utils.lazy import LazyProperty
                         raise NodeError("Cannot use both content and changeset")
                     super(FileNode, self).__init__(path, kind=NodeKind.FILE)
                     self.changeset = changeset
+                    if not isinstance(content, bytes) and content is not None:
+                        # File content is one thing that inherently must be bytes ... but
+                        # VCS module tries to be "user friendly" and support unicode ...
+                        content = safe_bytes(content)
                     self._content = content
                     self._mode = mode or 0o100644
                         mode = self._mode
                     return mode
-                def _get_content(self):
+                @property
+                def content(self):
+                    """
+                    Returns lazily byte content of the FileNode.
+                    """
                     if self.changeset:
                         content = self.changeset.get_file_content(self.path)
                     else:
                         content = self._content
                     return content
-                @property
-                def content(self):
-                    """
-                    Returns lazily content of the FileNode. If possible, would try to
-                    decode content from UTF-8.
-                    """
-                    content = self._get_content()
-                    if bool(content and '\0' in content):
-                        return content
-                    return safe_unicode(content)
                 @LazyProperty
                 def size(self):
                     if self.changeset:
                     """
                     from pygments import lexers
                     try:
-                        lexer = lexers.guess_lexer_for_filename(self.name, self.content, stripnl=False)
+                        lexer = lexers.guess_lexer_for_filename(self.name, safe_unicode(self.content), stripnl=False)
                     except lexers.ClassNotFound:
                         lexer = lexers.TextLexer(stripnl=False)
                     # returns first alias
                     """
                     Returns True if file has binary content.
                     """
-                    _bin = '\0' in self._get_content()
+                    return b'\0' in self.content
-                    return _bin
                 def is_browser_compatible_image(self):
                     return self.mimetype in [

kallithea/lib/vcs/utils/annotate.py

0 +2 -3

             from kallithea.lib.vcs.exceptions import VCSError
             from kallithea.lib.vcs.nodes import FileNode
+            from kallithea.lib.vcs.utils import safe_unicode
             def annotate_highlight(filenode, annotate_from_changeset_func=None,
                 formatter = AnnotateHtmlFormatter(filenode=filenode, order=order,
                     headers=headers,
                     annotate_from_changeset_func=annotate_from_changeset_func, **options)
-                lexer = filenode.lexer
+                return highlight(safe_unicode(filenode.content), filenode.lexer, formatter)
-                highlighted = highlight(filenode.content, lexer, formatter)
-                return highlighted
             class AnnotateHtmlFormatter(HtmlFormatter):

kallithea/templates/admin/gists/edit.html

0 +1 -1

                                 </div>
                                 <div class="panel-body no-padding">
                                     <div id="editor_container">
-                                        <textarea id="editor_${h.FID('f',file.path)}" name="contents" style="display:none">${file.content}</textarea>
+                                        <textarea id="editor_${h.FID('f',file.path)}" name="contents" style="display:none">${safe_unicode(file.content)}</textarea>
                                     </div>
                                 </div>
                             </div>

kallithea/templates/files/files_edit.html

0 +1 -1

                                 </span>
                           </div>
                           <div class="panel-body no-padding">
-                            <textarea id="editor" name="content" style="display:none">${h.escape(c.file.content)|n}</textarea>
+                            <textarea id="editor" name="content" style="display:none">${h.escape(safe_unicode(c.file.content))|n}</textarea>
                           </div>
                         </div>
                         <div>

kallithea/tests/vcs/test_git.py

0 +2 -2

                     for cs in self.repo:
                         assert isinstance(cs.author, unicode)
-                def test_repo_files_content_is_unicode(self):
+                def test_repo_files_content_is_bytes(self):
                     changeset = self.repo.get_changeset()
                     for node in changeset.get_node('/'):
                         if node.is_file():
-                            assert isinstance(node.content, unicode)
+                            assert isinstance(node.content, bytes)
                 def test_wrong_path(self):
                     # There is 'setup.py' in the root dir but not there:

kallithea/tests/vcs/test_hg.py

0 +2 -2

                     for cm in self.repo:
                         assert isinstance(cm.author, unicode)
-                def test_repo_files_content_is_unicode(self):
+                def test_repo_files_content_is_bytes(self):
                     test_changeset = self.repo.get_changeset(100)
                     for node in test_changeset.get_node('/'):
                         if node.is_file():
-                            assert isinstance(node.content, unicode)
+                            assert isinstance(node.content, bytes)
                 def test_wrong_path(self):
                     # There is 'setup.py' in the root dir but not there:

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages