summary refs log tree commit diff
path: root/synapse/util/stringutils.py
diff options
context:
space:
mode:
Diffstat (limited to 'synapse/util/stringutils.py')
-rw-r--r--synapse/util/stringutils.py52
1 files changed, 52 insertions, 0 deletions
diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py

index 43d9db67ec..fdcb375f95 100644 --- a/synapse/util/stringutils.py +++ b/synapse/util/stringutils.py
@@ -16,6 +16,8 @@ import random import string +import six +from six import PY2, PY3 from six.moves import range _string_with_symbols = ( @@ -34,6 +36,17 @@ def random_string_with_symbols(length): def is_ascii(s): + + if PY3: + if isinstance(s, bytes): + try: + s.decode('ascii').encode('ascii') + except UnicodeDecodeError: + return False + except UnicodeEncodeError: + return False + return True + try: s.encode("ascii") except UnicodeEncodeError: @@ -49,6 +62,9 @@ def to_ascii(s): If given None then will return None. """ + if PY3: + return s + if s is None: return None @@ -56,3 +72,39 @@ def to_ascii(s): return s.encode("ascii") except UnicodeEncodeError: return s + + +def exception_to_unicode(e): + """Helper function to extract the text of an exception as a unicode string + + Args: + e (Exception): exception to be stringified + + Returns: + unicode + """ + # urgh, this is a mess. The basic problem here is that psycopg2 constructs its + # exceptions with PyErr_SetString, with a (possibly non-ascii) argument. str() will + # then produce the raw byte sequence. Under Python 2, this will then cause another + # error if it gets mixed with a `unicode` object, as per + # https://github.com/matrix-org/synapse/issues/4252 + + # First of all, if we're under python3, everything is fine because it will sort this + # nonsense out for us. + if not PY2: + return str(e) + + # otherwise let's have a stab at decoding the exception message. We'll circumvent + # Exception.__str__(), which would explode if someone raised Exception(u'non-ascii') + # and instead look at what is in the args member. + + if len(e.args) == 0: + return u"" + elif len(e.args) > 1: + return six.text_type(repr(e.args)) + + msg = e.args[0] + if isinstance(msg, bytes): + return msg.decode('utf-8', errors='replace') + else: + return msg