summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--changelog.d/8632.bugfix1
-rw-r--r--synapse/api/auth.py4
-rw-r--r--synapse/handlers/auth.py4
-rw-r--r--synapse/handlers/cas_handler.py4
-rw-r--r--synapse/handlers/oidc_handler.py4
-rw-r--r--synapse/handlers/saml_handler.py4
-rw-r--r--synapse/http/site.py16
-rw-r--r--tests/handlers/test_oidc.py24
8 files changed, 33 insertions, 28 deletions
diff --git a/changelog.d/8632.bugfix b/changelog.d/8632.bugfix
new file mode 100644
index 0000000000..7d834aa2e2
--- /dev/null
+++ b/changelog.d/8632.bugfix
@@ -0,0 +1 @@
+Fix handling of User-Agent headers that are invalid UTF-8, which caused user agents of users to not get correctly recorded.
diff --git a/synapse/api/auth.py b/synapse/api/auth.py
index bff87fabde..526cb58c5f 100644
--- a/synapse/api/auth.py
+++ b/synapse/api/auth.py
@@ -184,9 +184,7 @@ class Auth:
         """
         try:
             ip_addr = self.hs.get_ip_from_request(request)
-            user_agent = request.requestHeaders.getRawHeaders(
-                b"User-Agent", default=[b""]
-            )[0].decode("ascii", "surrogateescape")
+            user_agent = request.get_user_agent("")
 
             access_token = self.get_access_token_from_request(request)
 
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index 8619fbb982..48d60feaab 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -470,9 +470,7 @@ class AuthHandler(BaseHandler):
             # authentication flow.
             await self.store.set_ui_auth_clientdict(sid, clientdict)
 
-        user_agent = request.requestHeaders.getRawHeaders(b"User-Agent", default=[b""])[
-            0
-        ].decode("ascii", "surrogateescape")
+        user_agent = request.get_user_agent("")
 
         await self.store.add_user_agent_ip_to_ui_auth_session(
             session.session_id, user_agent, clientip
diff --git a/synapse/handlers/cas_handler.py b/synapse/handlers/cas_handler.py
index a4cc4b9a5a..048a3b3c0b 100644
--- a/synapse/handlers/cas_handler.py
+++ b/synapse/handlers/cas_handler.py
@@ -212,9 +212,7 @@ class CasHandler:
         else:
             if not registered_user_id:
                 # Pull out the user-agent and IP from the request.
-                user_agent = request.requestHeaders.getRawHeaders(
-                    b"User-Agent", default=[b""]
-                )[0].decode("ascii", "surrogateescape")
+                user_agent = request.get_user_agent("")
                 ip_address = self.hs.get_ip_from_request(request)
 
                 registered_user_id = await self._registration_handler.register_user(
diff --git a/synapse/handlers/oidc_handler.py b/synapse/handlers/oidc_handler.py
index 05ac86e697..a312610635 100644
--- a/synapse/handlers/oidc_handler.py
+++ b/synapse/handlers/oidc_handler.py
@@ -695,9 +695,7 @@ class OidcHandler:
                 return
 
         # Pull out the user-agent and IP from the request.
-        user_agent = request.requestHeaders.getRawHeaders(b"User-Agent", default=[b""])[
-            0
-        ].decode("ascii", "surrogateescape")
+        user_agent = request.get_user_agent("")
         ip_address = self.hs.get_ip_from_request(request)
 
         # Call the mapper to register/login the user
diff --git a/synapse/handlers/saml_handler.py b/synapse/handlers/saml_handler.py
index 285c481a96..fd6c5e9ea8 100644
--- a/synapse/handlers/saml_handler.py
+++ b/synapse/handlers/saml_handler.py
@@ -216,9 +216,7 @@ class SamlHandler:
                 return
 
         # Pull out the user-agent and IP from the request.
-        user_agent = request.requestHeaders.getRawHeaders(b"User-Agent", default=[b""])[
-            0
-        ].decode("ascii", "surrogateescape")
+        user_agent = request.get_user_agent("")
         ip_address = self.hs.get_ip_from_request(request)
 
         # Call the mapper to register/login the user
diff --git a/synapse/http/site.py b/synapse/http/site.py
index 6e79b47828..ca673028e4 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -109,8 +109,14 @@ class SynapseRequest(Request):
             method = self.method.decode("ascii")
         return method
 
-    def get_user_agent(self):
-        return self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1]
+    def get_user_agent(self, default: str) -> str:
+        """Return the last User-Agent header, or the given default.
+        """
+        user_agent = self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1]
+        if user_agent is None:
+            return default
+
+        return user_agent.decode("ascii", "replace")
 
     def render(self, resrc):
         # this is called once a Resource has been found to serve the request; in our
@@ -274,11 +280,7 @@ class SynapseRequest(Request):
         # with maximum recursion trying to log errors about
         # the charset problem.
         # c.f. https://github.com/matrix-org/synapse/issues/3471
-        user_agent = self.get_user_agent()
-        if user_agent is not None:
-            user_agent = user_agent.decode("utf-8", "replace")
-        else:
-            user_agent = "-"
+        user_agent = self.get_user_agent("-")
 
         code = str(self.code)
         if not self.finished:
diff --git a/tests/handlers/test_oidc.py b/tests/handlers/test_oidc.py
index b6f436c016..0d51705849 100644
--- a/tests/handlers/test_oidc.py
+++ b/tests/handlers/test_oidc.py
@@ -394,7 +394,14 @@ class OidcHandlerTestCase(HomeserverTestCase):
         self.handler._map_userinfo_to_user = simple_async_mock(return_value=user_id)
         self.handler._auth_handler.complete_sso_login = simple_async_mock()
         request = Mock(
-            spec=["args", "getCookie", "addCookie", "requestHeaders", "getClientIP"]
+            spec=[
+                "args",
+                "getCookie",
+                "addCookie",
+                "requestHeaders",
+                "getClientIP",
+                "get_user_agent",
+            ]
         )
 
         code = "code"
@@ -414,9 +421,8 @@ class OidcHandlerTestCase(HomeserverTestCase):
         request.args[b"code"] = [code.encode("utf-8")]
         request.args[b"state"] = [state.encode("utf-8")]
 
-        request.requestHeaders = Mock(spec=["getRawHeaders"])
-        request.requestHeaders.getRawHeaders.return_value = [user_agent.encode("ascii")]
         request.getClientIP.return_value = ip_address
+        request.get_user_agent.return_value = user_agent
 
         self.get_success(self.handler.handle_oidc_callback(request))
 
@@ -621,7 +627,14 @@ class OidcHandlerTestCase(HomeserverTestCase):
         self.handler._map_userinfo_to_user = simple_async_mock(return_value=user_id)
         self.handler._auth_handler.complete_sso_login = simple_async_mock()
         request = Mock(
-            spec=["args", "getCookie", "addCookie", "requestHeaders", "getClientIP"]
+            spec=[
+                "args",
+                "getCookie",
+                "addCookie",
+                "requestHeaders",
+                "getClientIP",
+                "get_user_agent",
+            ]
         )
 
         state = "state"
@@ -637,9 +650,8 @@ class OidcHandlerTestCase(HomeserverTestCase):
         request.args[b"code"] = [b"code"]
         request.args[b"state"] = [state.encode("utf-8")]
 
-        request.requestHeaders = Mock(spec=["getRawHeaders"])
-        request.requestHeaders.getRawHeaders.return_value = [b"Browser"]
         request.getClientIP.return_value = "10.0.0.1"
+        request.get_user_agent.return_value = "Browser"
 
         self.get_success(self.handler.handle_oidc_callback(request))