summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--changelog.d/4253.bugfix1
-rw-r--r--synapse/storage/_base.py15
-rw-r--r--synapse/util/stringutils.py39
3 files changed, 47 insertions, 8 deletions
diff --git a/changelog.d/4253.bugfix b/changelog.d/4253.bugfix
new file mode 100644
index 0000000000..1796e95b86
--- /dev/null
+++ b/changelog.d/4253.bugfix
@@ -0,0 +1 @@
+Fix UnicodeDecodeError when postgres is configured to give non-English errors
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index d9d0255d0b..38e7d26365 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -29,6 +29,7 @@ from synapse.api.errors import StoreError
 from synapse.storage.engines import PostgresEngine
 from synapse.util.caches.descriptors import Cache
 from synapse.util.logcontext import LoggingContext, PreserveLoggingContext
+from synapse.util.stringutils import exception_to_unicode
 
 logger = logging.getLogger(__name__)
 
@@ -249,32 +250,32 @@ class SQLBaseStore(object):
                 except self.database_engine.module.OperationalError as e:
                     # This can happen if the database disappears mid
                     # transaction.
-                    logger.warn(
+                    logger.warning(
                         "[TXN OPERROR] {%s} %s %d/%d",
-                        name, e, i, N
+                        name, exception_to_unicode(e), i, N
                     )
                     if i < N:
                         i += 1
                         try:
                             conn.rollback()
                         except self.database_engine.module.Error as e1:
-                            logger.warn(
+                            logger.warning(
                                 "[TXN EROLL] {%s} %s",
-                                name, e1,
+                                name, exception_to_unicode(e1),
                             )
                         continue
                     raise
                 except self.database_engine.module.DatabaseError as e:
                     if self.database_engine.is_deadlock(e):
-                        logger.warn("[TXN DEADLOCK] {%s} %d/%d", name, i, N)
+                        logger.warning("[TXN DEADLOCK] {%s} %d/%d", name, i, N)
                         if i < N:
                             i += 1
                             try:
                                 conn.rollback()
                             except self.database_engine.module.Error as e1:
-                                logger.warn(
+                                logger.warning(
                                     "[TXN EROLL] {%s} %s",
-                                    name, e1,
+                                    name, exception_to_unicode(e1),
                                 )
                             continue
                     raise
diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py
index 6f318c6a29..fdcb375f95 100644
--- a/synapse/util/stringutils.py
+++ b/synapse/util/stringutils.py
@@ -16,7 +16,8 @@
 import random
 import string
 
-from six import PY3
+import six
+from six import PY2, PY3
 from six.moves import range
 
 _string_with_symbols = (
@@ -71,3 +72,39 @@ def to_ascii(s):
         return s.encode("ascii")
     except UnicodeEncodeError:
         return s
+
+
+def exception_to_unicode(e):
+    """Helper function to extract the text of an exception as a unicode string
+
+    Args:
+        e (Exception): exception to be stringified
+
+    Returns:
+        unicode
+    """
+    # urgh, this is a mess. The basic problem here is that psycopg2 constructs its
+    # exceptions with PyErr_SetString, with a (possibly non-ascii) argument. str() will
+    # then produce the raw byte sequence. Under Python 2, this will then cause another
+    # error if it gets mixed with a `unicode` object, as per
+    # https://github.com/matrix-org/synapse/issues/4252
+
+    # First of all, if we're under python3, everything is fine because it will sort this
+    # nonsense out for us.
+    if not PY2:
+        return str(e)
+
+    # otherwise let's have a stab at decoding the exception message. We'll circumvent
+    # Exception.__str__(), which would explode if someone raised Exception(u'non-ascii')
+    # and instead look at what is in the args member.
+
+    if len(e.args) == 0:
+        return u""
+    elif len(e.args) > 1:
+        return six.text_type(repr(e.args))
+
+    msg = e.args[0]
+    if isinstance(msg, bytes):
+        return msg.decode('utf-8', errors='replace')
+    else:
+        return msg