""" Utilities for XML generation/parsing. """ import six from xml.sax.saxutils import XMLGenerator, quoteattr from urllib import quote from rhodecode.lib.utils import safe_str, safe_unicode class SimplerXMLGenerator(XMLGenerator): def addQuickElement(self, name, contents=None, attrs=None): "Convenience method for adding an element with no children" if attrs is None: attrs = {} self.startElement(name, attrs) if contents is not None: self.characters(contents) self.endElement(name) def startElement(self, name, attrs): self._write('<' + name) # sort attributes for consistent output for (name, value) in sorted(attrs.items()): self._write(' %s=%s' % (name, quoteattr(value))) self._write(six.u('>')) def iri_to_uri(iri): """ Convert an Internationalized Resource Identifier (IRI) portion to a URI portion that is suitable for inclusion in a URL. This is the algorithm from section 3.1 of RFC 3987. However, since we are assuming input is either UTF-8 or unicode already, we can simplify things a little from the full method. Returns an ASCII string containing the encoded result. """ # The list of safe characters here is constructed from the "reserved" and # "unreserved" characters specified in sections 2.2 and 2.3 of RFC 3986: # reserved = gen-delims / sub-delims # gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" # / "*" / "+" / "," / ";" / "=" # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" # Of the unreserved characters, urllib.quote already considers all but # the ~ safe. # The % character is also added to the list of safe characters here, as the # end of section 3.1 of RFC 3987 specifically mentions that % must not be # converted. if iri is None: return iri return quote(safe_str(iri), safe=b"/#%[]=:;$&()+,!?*@'~") def force_text(text, strings_only=False): return safe_unicode(text)