summary refs log tree commit diff
diff options
context:
space:
mode:
authorRichard van der Hoff <1389908+richvdh@users.noreply.github.com>2018-07-02 15:25:36 +0100
committerGitHub <noreply@github.com>2018-07-02 15:25:36 +0100
commitf741630847352dd2cd4c85b54d9343738bf77165 (patch)
tree5ff04933bd20139ea8da68d9d1c7976141bdf502
parentMerge pull request #3467 from matrix-org/hawkowl/contributor-requirements (diff)
parentnews snippet (diff)
downloadsynapse-f741630847352dd2cd4c85b54d9343738bf77165.tar.xz
Merge pull request #3470 from matrix-org/matthew/fix-utf8-logging
don't mix unicode strings with utf8-in-byte-strings
-rw-r--r--changelog.d/3470.bugfix1
-rw-r--r--synapse/http/site.py19
2 files changed, 18 insertions, 2 deletions
diff --git a/changelog.d/3470.bugfix b/changelog.d/3470.bugfix
new file mode 100644
index 0000000000..1308931191
--- /dev/null
+++ b/changelog.d/3470.bugfix
@@ -0,0 +1 @@
+Fix bug where synapse would explode when receiving unicode in HTTP User-Agent header
diff --git a/synapse/http/site.py b/synapse/http/site.py
index 74a752d6cf..fe93643b1e 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -107,13 +107,28 @@ class SynapseRequest(Request):
 
         end_time = time.time()
 
+        # need to decode as it could be raw utf-8 bytes
+        # from a IDN servname in an auth header
+        authenticated_entity = self.authenticated_entity
+        if authenticated_entity is not None:
+            authenticated_entity = authenticated_entity.decode("utf-8", "replace")
+
+        # ...or could be raw utf-8 bytes in the User-Agent header.
+        # N.B. if you don't do this, the logger explodes cryptically
+        # with maximum recursion trying to log errors about
+        # the charset problem.
+        # c.f. https://github.com/matrix-org/synapse/issues/3471
+        user_agent = self.get_user_agent()
+        if user_agent is not None:
+            user_agent = user_agent.decode("utf-8", "replace")
+
         self.site.access_logger.info(
             "%s - %s - {%s}"
             " Processed request: %.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)"
             " %sB %s \"%s %s %s\" \"%s\" [%d dbevts]",
             self.getClientIP(),
             self.site.site_tag,
-            self.authenticated_entity,
+            authenticated_entity,
             end_time - self.start_time,
             ru_utime,
             ru_stime,
@@ -125,7 +140,7 @@ class SynapseRequest(Request):
             self.method,
             self.get_redacted_uri(),
             self.clientproto,
-            self.get_user_agent(),
+            user_agent,
             evt_db_fetch_count,
         )