Fix UnicodeDecodeError when postgres is not configured in english (#4253)
This is a bit of a half-assed effort at fixing https://github.com/matrix-org/synapse/issues/4252. Fundamentally the right answer is to drop support for Python 2.
1 files changed, 38 insertions, 1 deletions
diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py
index 6f318c6a29..fdcb375f95 100644
--- a/synapse/util/stringutils.py
+++ b/synapse/util/stringutils.py
@@ -16,7 +16,8 @@
import random
import string
-from six import PY3
+import six
+from six import PY2, PY3
from six.moves import range
_string_with_symbols = (
@@ -71,3 +72,39 @@ def to_ascii(s):
return s.encode("ascii")
except UnicodeEncodeError:
return s
+
+
+def exception_to_unicode(e):
+ """Helper function to extract the text of an exception as a unicode string
+
+ Args:
+ e (Exception): exception to be stringified
+
+ Returns:
+ unicode
+ """
+ # urgh, this is a mess. The basic problem here is that psycopg2 constructs its
+ # exceptions with PyErr_SetString, with a (possibly non-ascii) argument. str() will
+ # then produce the raw byte sequence. Under Python 2, this will then cause another
+ # error if it gets mixed with a `unicode` object, as per
+ # https://github.com/matrix-org/synapse/issues/4252
+
+ # First of all, if we're under python3, everything is fine because it will sort this
+ # nonsense out for us.
+ if not PY2:
+ return str(e)
+
+ # otherwise let's have a stab at decoding the exception message. We'll circumvent
+ # Exception.__str__(), which would explode if someone raised Exception(u'non-ascii')
+ # and instead look at what is in the args member.
+
+ if len(e.args) == 0:
+ return u""
+ elif len(e.args) > 1:
+ return six.text_type(repr(e.args))
+
+ msg = e.args[0]
+ if isinstance(msg, bytes):
+ return msg.decode('utf-8', errors='replace')
+ else:
+ return msg
|