From 3a245f6cfe3f35f5a37bcd91f3242ef59dc71332 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Fri, 19 Aug 2022 11:03:29 +0000
Subject: Fix validation problem that occurs when a user tries to deactivate
 their account or change their password. (#13563)

---
 synapse/rest/client/account.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'synapse')

diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py
index e5ee63133b..9041e29d6c 100644
--- a/synapse/rest/client/account.py
+++ b/synapse/rest/client/account.py
@@ -196,7 +196,7 @@ class PasswordRestServlet(RestServlet):
                 params, session_id = await self.auth_handler.validate_user_via_ui_auth(
                     requester,
                     request,
-                    body.dict(),
+                    body.dict(exclude_unset=True),
                     "modify your account password",
                 )
             except InteractiveAuthIncompleteError as e:
@@ -219,7 +219,7 @@ class PasswordRestServlet(RestServlet):
                 result, params, session_id = await self.auth_handler.check_ui_auth(
                     [[LoginType.EMAIL_IDENTITY]],
                     request,
-                    body.dict(),
+                    body.dict(exclude_unset=True),
                     "modify your account password",
                 )
             except InteractiveAuthIncompleteError as e:
@@ -316,7 +316,7 @@ class DeactivateAccountRestServlet(RestServlet):
         await self.auth_handler.validate_user_via_ui_auth(
             requester,
             request,
-            body.dict(),
+            body.dict(exclude_unset=True),
             "deactivate your account",
         )
         result = await self._deactivate_account_handler.deactivate_account(
-- 
cgit 1.5.1


From f9f03426de338ae1879e174f63adf698bbfc3a4b Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Fri, 19 Aug 2022 17:17:10 +0100
Subject: Implement MSC3852: Expose `last_seen_user_agent` to users for their
 own devices; also expose to Admin API (#13549)

---
 changelog.d/13549.feature        |  1 +
 changelog.d/13549.misc           |  1 +
 docs/admin_api/user_admin_api.md |  7 +++
 synapse/config/experimental.py   |  3 ++
 synapse/handlers/device.py       |  9 +++-
 synapse/rest/client/devices.py   | 27 ++++++++++++
 tests/rest/admin/test_user.py    | 92 +++++++++++++++++++++++++++++++++++++++-
 tests/unittest.py                | 15 +++++++
 8 files changed, 153 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13549.feature
 create mode 100644 changelog.d/13549.misc

(limited to 'synapse')

diff --git a/changelog.d/13549.feature b/changelog.d/13549.feature
new file mode 100644
index 0000000000..b6a726789c
--- /dev/null
+++ b/changelog.d/13549.feature
@@ -0,0 +1 @@
+Add an experimental implementation for [MSC3852](https://github.com/matrix-org/matrix-spec-proposals/pull/3852).
\ No newline at end of file
diff --git a/changelog.d/13549.misc b/changelog.d/13549.misc
new file mode 100644
index 0000000000..5b4303e87e
--- /dev/null
+++ b/changelog.d/13549.misc
@@ -0,0 +1 @@
+Allow specifying additional request fields when using the `HomeServerTestCase.login` helper method.
\ No newline at end of file
diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md
index 0871cfebf5..c1ca0c8a64 100644
--- a/docs/admin_api/user_admin_api.md
+++ b/docs/admin_api/user_admin_api.md
@@ -753,6 +753,7 @@ A response body like the following is returned:
       "device_id": "QBUAZIFURK",
       "display_name": "android",
       "last_seen_ip": "1.2.3.4",
+      "last_seen_user_agent": "Mozilla/5.0 (X11; Linux x86_64; rv:103.0) Gecko/20100101 Firefox/103.0",
       "last_seen_ts": 1474491775024,
       "user_id": "<user_id>"
     },
@@ -760,6 +761,7 @@ A response body like the following is returned:
       "device_id": "AUIECTSRND",
       "display_name": "ios",
       "last_seen_ip": "1.2.3.5",
+      "last_seen_user_agent": "Mozilla/5.0 (X11; Linux x86_64; rv:103.0) Gecko/20100101 Firefox/103.0",
       "last_seen_ts": 1474491775025,
       "user_id": "<user_id>"
     }
@@ -786,6 +788,8 @@ The following fields are returned in the JSON response body:
     Absent if no name has been set.
   - `last_seen_ip` - The IP address where this device was last seen.
     (May be a few minutes out of date, for efficiency reasons).
+  - `last_seen_user_agent` - The user agent of the device when it was last seen.
+    (May be a few minutes out of date, for efficiency reasons).
   - `last_seen_ts` - The timestamp (in milliseconds since the unix epoch) when this
     devices was last seen. (May be a few minutes out of date, for efficiency reasons).
   - `user_id` - Owner of  device.
@@ -837,6 +841,7 @@ A response body like the following is returned:
   "device_id": "<device_id>",
   "display_name": "android",
   "last_seen_ip": "1.2.3.4",
+  "last_seen_user_agent": "Mozilla/5.0 (X11; Linux x86_64; rv:103.0) Gecko/20100101 Firefox/103.0",
   "last_seen_ts": 1474491775024,
   "user_id": "<user_id>"
 }
@@ -858,6 +863,8 @@ The following fields are returned in the JSON response body:
   Absent if no name has been set.
 - `last_seen_ip` - The IP address where this device was last seen.
   (May be a few minutes out of date, for efficiency reasons).
+  - `last_seen_user_agent` - The user agent of the device when it was last seen.
+  (May be a few minutes out of date, for efficiency reasons).
 - `last_seen_ts` - The timestamp (in milliseconds since the unix epoch) when this
   devices was last seen. (May be a few minutes out of date, for efficiency reasons).
 - `user_id` - Owner of  device.
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 7d17c958bb..c1ff417539 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -90,3 +90,6 @@ class ExperimentalConfig(Config):
 
         # MSC3848: Introduce errcodes for specific event sending failures
         self.msc3848_enabled: bool = experimental.get("msc3848_enabled", False)
+
+        # MSC3852: Expose last seen user agent field on /_matrix/client/v3/devices.
+        self.msc3852_enabled: bool = experimental.get("msc3852_enabled", False)
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 1a8379854c..f5c586f657 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -74,6 +74,7 @@ class DeviceWorkerHandler:
         self._state_storage = hs.get_storage_controllers().state
         self._auth_handler = hs.get_auth_handler()
         self.server_name = hs.hostname
+        self._msc3852_enabled = hs.config.experimental.msc3852_enabled
 
     @trace
     async def get_devices_by_user(self, user_id: str) -> List[JsonDict]:
@@ -747,7 +748,13 @@ def _update_device_from_client_ips(
     device: JsonDict, client_ips: Mapping[Tuple[str, str], Mapping[str, Any]]
 ) -> None:
     ip = client_ips.get((device["user_id"], device["device_id"]), {})
-    device.update({"last_seen_ts": ip.get("last_seen"), "last_seen_ip": ip.get("ip")})
+    device.update(
+        {
+            "last_seen_user_agent": ip.get("user_agent"),
+            "last_seen_ts": ip.get("last_seen"),
+            "last_seen_ip": ip.get("ip"),
+        }
+    )
 
 
 class DeviceListUpdater:
diff --git a/synapse/rest/client/devices.py b/synapse/rest/client/devices.py
index 6fab102437..ed6ce78d47 100644
--- a/synapse/rest/client/devices.py
+++ b/synapse/rest/client/devices.py
@@ -42,12 +42,26 @@ class DevicesRestServlet(RestServlet):
         self.hs = hs
         self.auth = hs.get_auth()
         self.device_handler = hs.get_device_handler()
+        self._msc3852_enabled = hs.config.experimental.msc3852_enabled
 
     async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request, allow_guest=True)
         devices = await self.device_handler.get_devices_by_user(
             requester.user.to_string()
         )
+
+        # If MSC3852 is disabled, then the "last_seen_user_agent" field will be
+        # removed from each device. If it is enabled, then the field name will
+        # be replaced by the unstable identifier.
+        #
+        # When MSC3852 is accepted, this block of code can just be removed to
+        # expose "last_seen_user_agent" to clients.
+        for device in devices:
+            last_seen_user_agent = device["last_seen_user_agent"]
+            del device["last_seen_user_agent"]
+            if self._msc3852_enabled:
+                device["org.matrix.msc3852.last_seen_user_agent"] = last_seen_user_agent
+
         return 200, {"devices": devices}
 
 
@@ -108,6 +122,7 @@ class DeviceRestServlet(RestServlet):
         self.auth = hs.get_auth()
         self.device_handler = hs.get_device_handler()
         self.auth_handler = hs.get_auth_handler()
+        self._msc3852_enabled = hs.config.experimental.msc3852_enabled
 
     async def on_GET(
         self, request: SynapseRequest, device_id: str
@@ -118,6 +133,18 @@ class DeviceRestServlet(RestServlet):
         )
         if device is None:
             raise NotFoundError("No device found")
+
+        # If MSC3852 is disabled, then the "last_seen_user_agent" field will be
+        # removed from each device. If it is enabled, then the field name will
+        # be replaced by the unstable identifier.
+        #
+        # When MSC3852 is accepted, this block of code can just be removed to
+        # expose "last_seen_user_agent" to clients.
+        last_seen_user_agent = device["last_seen_user_agent"]
+        del device["last_seen_user_agent"]
+        if self._msc3852_enabled:
+            device["org.matrix.msc3852.last_seen_user_agent"] = last_seen_user_agent
+
         return 200, device
 
     @interactive_auth_handler
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index 411e4ec005..1afd082707 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -1,4 +1,4 @@
-# Copyright 2018-2021 The Matrix.org Foundation C.I.C.
+# Copyright 2018-2022 The Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -904,6 +904,96 @@ class UsersListTestCase(unittest.HomeserverTestCase):
             )
 
 
+class UserDevicesTestCase(unittest.HomeserverTestCase):
+    """
+    Tests user device management-related Admin APIs.
+    """
+
+    servlets = [
+        synapse.rest.admin.register_servlets,
+        login.register_servlets,
+        sync.register_servlets,
+    ]
+
+    def prepare(
+        self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
+    ) -> None:
+        # Set up an Admin user to query the Admin API with.
+        self.admin_user_id = self.register_user("admin", "pass", admin=True)
+        self.admin_user_token = self.login("admin", "pass")
+
+        # Set up a test user to query the devices of.
+        self.other_user_device_id = "TESTDEVICEID"
+        self.other_user_device_display_name = "My Test Device"
+        self.other_user_client_ip = "1.2.3.4"
+        self.other_user_user_agent = "EquestriaTechnology/123.0"
+
+        self.other_user_id = self.register_user("user", "pass", displayname="User1")
+        self.other_user_token = self.login(
+            "user",
+            "pass",
+            device_id=self.other_user_device_id,
+            additional_request_fields={
+                "initial_device_display_name": self.other_user_device_display_name,
+            },
+        )
+
+        # Have the "other user" make a request so that the "last_seen_*" fields are
+        # populated in the tests below.
+        channel = self.make_request(
+            "GET",
+            "/_matrix/client/v3/sync",
+            access_token=self.other_user_token,
+            client_ip=self.other_user_client_ip,
+            custom_headers=[
+                ("User-Agent", self.other_user_user_agent),
+            ],
+        )
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+
+    def test_list_user_devices(self) -> None:
+        """
+        Tests that a user's devices and attributes are listed correctly via the Admin API.
+        """
+        # Request all devices of "other user"
+        channel = self.make_request(
+            "GET",
+            f"/_synapse/admin/v2/users/{self.other_user_id}/devices",
+            access_token=self.admin_user_token,
+        )
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+
+        # Double-check we got the single device expected
+        user_devices = channel.json_body["devices"]
+        self.assertEqual(len(user_devices), 1)
+        self.assertEqual(channel.json_body["total"], 1)
+
+        # Check that all the attributes of the device reported are as expected.
+        self._validate_attributes_of_device_response(user_devices[0])
+
+        # Request just a single device for "other user" by its ID
+        channel = self.make_request(
+            "GET",
+            f"/_synapse/admin/v2/users/{self.other_user_id}/devices/"
+            f"{self.other_user_device_id}",
+            access_token=self.admin_user_token,
+        )
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+
+        # Check that all the attributes of the device reported are as expected.
+        self._validate_attributes_of_device_response(channel.json_body)
+
+    def _validate_attributes_of_device_response(self, response: JsonDict) -> None:
+        # Check that all device expected attributes are present
+        self.assertEqual(response["user_id"], self.other_user_id)
+        self.assertEqual(response["device_id"], self.other_user_device_id)
+        self.assertEqual(response["display_name"], self.other_user_device_display_name)
+        self.assertEqual(response["last_seen_ip"], self.other_user_client_ip)
+        self.assertEqual(response["last_seen_user_agent"], self.other_user_user_agent)
+        self.assertIsInstance(response["last_seen_ts"], int)
+        self.assertGreater(response["last_seen_ts"], 0)
+
+
 class DeactivateAccountTestCase(unittest.HomeserverTestCase):
 
     servlets = [
diff --git a/tests/unittest.py b/tests/unittest.py
index bec4a3d023..975b0a23a7 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -677,14 +677,29 @@ class HomeserverTestCase(TestCase):
         username: str,
         password: str,
         device_id: Optional[str] = None,
+        additional_request_fields: Optional[Dict[str, str]] = None,
         custom_headers: Optional[Iterable[CustomHeaderType]] = None,
     ) -> str:
         """
         Log in a user, and get an access token. Requires the Login API be registered.
+
+        Args:
+            username: The localpart to assign to the new user.
+            password: The password to assign to the new user.
+            device_id: An optional device ID to assign to the new device created during
+                login.
+            additional_request_fields: A dictionary containing any additional /login
+                request fields and their values.
+            custom_headers: Custom HTTP headers and values to add to the /login request.
+
+        Returns:
+            The newly registered user's Matrix ID.
         """
         body = {"type": "m.login.password", "user": username, "password": password}
         if device_id:
             body["device_id"] = device_id
+        if additional_request_fields:
+            body.update(additional_request_fields)
 
         channel = self.make_request(
             "POST",
-- 
cgit 1.5.1


From 06df5d4250f54d5a95b0c90bfc9352ec6f02c520 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Fri, 19 Aug 2022 15:37:01 -0500
Subject: MSC2716v4 room version - remove namespace from MSC2716 event content
 fields (#13551)

Complement PR: https://github.com/matrix-org/complement/pull/450

As suggested in
https://github.com/matrix-org/matrix-spec-proposals/pull/2716#discussion_r941444525
---
 changelog.d/13551.feature            |  1 +
 synapse/api/constants.py             |  6 +++---
 synapse/api/room_versions.py         | 38 ++++++++++++++++++------------------
 synapse/events/utils.py              |  2 +-
 synapse/handlers/federation_event.py |  2 +-
 5 files changed, 25 insertions(+), 24 deletions(-)
 create mode 100644 changelog.d/13551.feature

(limited to 'synapse')

diff --git a/changelog.d/13551.feature b/changelog.d/13551.feature
new file mode 100644
index 0000000000..365673a3c1
--- /dev/null
+++ b/changelog.d/13551.feature
@@ -0,0 +1 @@
+Add `org.matrix.msc2716v4` experimental room version with updated content fields.
diff --git a/synapse/api/constants.py b/synapse/api/constants.py
index 1d46fb0e43..c73aea622a 100644
--- a/synapse/api/constants.py
+++ b/synapse/api/constants.py
@@ -216,11 +216,11 @@ class EventContentFields:
     MSC2716_HISTORICAL: Final = "org.matrix.msc2716.historical"
     # For "insertion" events to indicate what the next batch ID should be in
     # order to connect to it
-    MSC2716_NEXT_BATCH_ID: Final = "org.matrix.msc2716.next_batch_id"
+    MSC2716_NEXT_BATCH_ID: Final = "next_batch_id"
     # Used on "batch" events to indicate which insertion event it connects to
-    MSC2716_BATCH_ID: Final = "org.matrix.msc2716.batch_id"
+    MSC2716_BATCH_ID: Final = "batch_id"
     # For "marker" events
-    MSC2716_MARKER_INSERTION: Final = "org.matrix.msc2716.marker.insertion"
+    MSC2716_INSERTION_EVENT_REFERENCE: Final = "insertion_event_reference"
 
     # The authorising user for joining a restricted room.
     AUTHORISING_USER: Final = "join_authorised_via_users_server"
diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py
index 00e81b3afc..a0e4ab6db6 100644
--- a/synapse/api/room_versions.py
+++ b/synapse/api/room_versions.py
@@ -269,24 +269,6 @@ class RoomVersions:
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
     )
-    MSC2716v3 = RoomVersion(
-        "org.matrix.msc2716v3",
-        RoomDisposition.UNSTABLE,
-        EventFormatVersions.V3,
-        StateResolutionVersions.V2,
-        enforce_key_validity=True,
-        special_case_aliases_auth=False,
-        strict_canonicaljson=True,
-        limit_notifications_power_levels=True,
-        msc2176_redaction_rules=False,
-        msc3083_join_rules=False,
-        msc3375_redaction_rules=False,
-        msc2403_knocking=True,
-        msc2716_historical=True,
-        msc2716_redactions=True,
-        msc3787_knock_restricted_join_rule=False,
-        msc3667_int_only_power_levels=False,
-    )
     MSC3787 = RoomVersion(
         "org.matrix.msc3787",
         RoomDisposition.UNSTABLE,
@@ -323,6 +305,24 @@ class RoomVersions:
         msc3787_knock_restricted_join_rule=True,
         msc3667_int_only_power_levels=True,
     )
+    MSC2716v4 = RoomVersion(
+        "org.matrix.msc2716v4",
+        RoomDisposition.UNSTABLE,
+        EventFormatVersions.V3,
+        StateResolutionVersions.V2,
+        enforce_key_validity=True,
+        special_case_aliases_auth=False,
+        strict_canonicaljson=True,
+        limit_notifications_power_levels=True,
+        msc2176_redaction_rules=False,
+        msc3083_join_rules=False,
+        msc3375_redaction_rules=False,
+        msc2403_knocking=True,
+        msc2716_historical=True,
+        msc2716_redactions=True,
+        msc3787_knock_restricted_join_rule=False,
+        msc3667_int_only_power_levels=False,
+    )
 
 
 KNOWN_ROOM_VERSIONS: Dict[str, RoomVersion] = {
@@ -338,9 +338,9 @@ KNOWN_ROOM_VERSIONS: Dict[str, RoomVersion] = {
         RoomVersions.V7,
         RoomVersions.V8,
         RoomVersions.V9,
-        RoomVersions.MSC2716v3,
         RoomVersions.MSC3787,
         RoomVersions.V10,
+        RoomVersions.MSC2716v4,
     )
 }
 
diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index ac91c5eb57..71853caad8 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -161,7 +161,7 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic
     elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_BATCH:
         add_fields(EventContentFields.MSC2716_BATCH_ID)
     elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_MARKER:
-        add_fields(EventContentFields.MSC2716_MARKER_INSERTION)
+        add_fields(EventContentFields.MSC2716_INSERTION_EVENT_REFERENCE)
 
     allowed_fields = {k: v for k, v in event_dict.items() if k in allowed_keys}
 
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index f40b071a74..32326975a1 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -1384,7 +1384,7 @@ class FederationEventHandler:
         logger.debug("_handle_marker_event: received %s", marker_event)
 
         insertion_event_id = marker_event.content.get(
-            EventContentFields.MSC2716_MARKER_INSERTION
+            EventContentFields.MSC2716_INSERTION_EVENT_REFERENCE
         )
 
         if insertion_event_id is None:
-- 
cgit 1.5.1


From 94375f7a913f75fe0a93a3eda2bfe5060e975290 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Mon, 22 Aug 2022 10:03:11 +0100
Subject: Remove redundant opentracing spans for `/sendToDevice` and
 `/keys/upload` (#13574)

---
 changelog.d/13574.bugfix            | 1 +
 synapse/rest/client/keys.py         | 3 +--
 synapse/rest/client/sendtodevice.py | 3 +--
 3 files changed, 3 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/13574.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13574.bugfix b/changelog.d/13574.bugfix
new file mode 100644
index 0000000000..3899c137aa
--- /dev/null
+++ b/changelog.d/13574.bugfix
@@ -0,0 +1 @@
+Fix the `opentracing.force_tracing_for_users` config option not applying to [`/sendToDevice`](https://spec.matrix.org/v1.3/client-server-api/#put_matrixclientv3sendtodeviceeventtypetxnid) and [`/keys/upload`](https://spec.matrix.org/v1.3/client-server-api/#post_matrixclientv3keysupload) requests.
\ No newline at end of file
diff --git a/synapse/rest/client/keys.py b/synapse/rest/client/keys.py
index e3f454896a..a395694fa5 100644
--- a/synapse/rest/client/keys.py
+++ b/synapse/rest/client/keys.py
@@ -26,7 +26,7 @@ from synapse.http.servlet import (
     parse_string,
 )
 from synapse.http.site import SynapseRequest
-from synapse.logging.opentracing import log_kv, set_tag, trace_with_opname
+from synapse.logging.opentracing import log_kv, set_tag
 from synapse.types import JsonDict, StreamToken
 
 from ._base import client_patterns, interactive_auth_handler
@@ -71,7 +71,6 @@ class KeyUploadServlet(RestServlet):
         self.e2e_keys_handler = hs.get_e2e_keys_handler()
         self.device_handler = hs.get_device_handler()
 
-    @trace_with_opname("upload_keys")
     async def on_POST(
         self, request: SynapseRequest, device_id: Optional[str]
     ) -> Tuple[int, JsonDict]:
diff --git a/synapse/rest/client/sendtodevice.py b/synapse/rest/client/sendtodevice.py
index 1a8e9a96d4..46a8b03829 100644
--- a/synapse/rest/client/sendtodevice.py
+++ b/synapse/rest/client/sendtodevice.py
@@ -19,7 +19,7 @@ from synapse.http import servlet
 from synapse.http.server import HttpServer
 from synapse.http.servlet import assert_params_in_dict, parse_json_object_from_request
 from synapse.http.site import SynapseRequest
-from synapse.logging.opentracing import set_tag, trace_with_opname
+from synapse.logging.opentracing import set_tag
 from synapse.rest.client.transactions import HttpTransactionCache
 from synapse.types import JsonDict
 
@@ -43,7 +43,6 @@ class SendToDeviceRestServlet(servlet.RestServlet):
         self.txns = HttpTransactionCache(hs)
         self.device_message_handler = hs.get_device_message_handler()
 
-    @trace_with_opname("sendToDevice")
     def on_PUT(
         self, request: SynapseRequest, message_type: str, txn_id: str
     ) -> Awaitable[Tuple[int, JsonDict]]:
-- 
cgit 1.5.1


From 3dd175b628bab5638165f20de9eade36a4e88147 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Mon, 22 Aug 2022 15:17:59 +0200
Subject: `synapse.api.auth.Auth` cleanup: make permission-related methods use
 `Requester` instead of the `UserID` (#13024)

Part of #13019

This changes all the permission-related methods to rely on the Requester instead of the UserID. This is a first step towards enabling scoped access tokens at some point, since I expect the Requester to have scope-related informations in it.

It also changes methods which figure out the user/device/appservice out of the access token to return a Requester instead of something else. This avoids having store-related objects in the methods signatures.
---
 changelog.d/13024.misc                           |   1 +
 synapse/api/auth.py                              | 202 +++++++++++------------
 synapse/handlers/auth.py                         |  17 +-
 synapse/handlers/directory.py                    |  24 ++-
 synapse/handlers/initial_sync.py                 |   6 +-
 synapse/handlers/message.py                      |  23 +--
 synapse/handlers/pagination.py                   |   2 +-
 synapse/handlers/register.py                     |  15 +-
 synapse/handlers/relations.py                    |   2 +-
 synapse/handlers/room.py                         |   4 +-
 synapse/handlers/room_member.py                  |  10 +-
 synapse/handlers/typing.py                       |  10 +-
 synapse/http/site.py                             |   2 +-
 synapse/rest/admin/_base.py                      |  10 +-
 synapse/rest/admin/media.py                      |   6 +-
 synapse/rest/admin/rooms.py                      |  12 +-
 synapse/rest/admin/users.py                      |  15 +-
 synapse/rest/client/profile.py                   |   4 +-
 synapse/rest/client/register.py                  |   3 -
 synapse/rest/client/room.py                      |  13 +-
 synapse/server_notices/server_notices_manager.py |   2 +-
 synapse/storage/databases/main/registration.py   |   2 +-
 tests/api/test_auth.py                           |   8 +-
 tests/handlers/test_typing.py                    |   8 +-
 tests/rest/client/test_retention.py              |   4 +-
 tests/rest/client/test_shadow_banned.py          |   6 +-
 26 files changed, 203 insertions(+), 208 deletions(-)
 create mode 100644 changelog.d/13024.misc

(limited to 'synapse')

diff --git a/changelog.d/13024.misc b/changelog.d/13024.misc
new file mode 100644
index 0000000000..aa43c82429
--- /dev/null
+++ b/changelog.d/13024.misc
@@ -0,0 +1 @@
+Refactor methods in `synapse.api.auth.Auth` to use `Requester` objects everywhere instead of user IDs.
diff --git a/synapse/api/auth.py b/synapse/api/auth.py
index 523bad0c55..9a1aea083f 100644
--- a/synapse/api/auth.py
+++ b/synapse/api/auth.py
@@ -37,8 +37,7 @@ from synapse.logging.opentracing import (
     start_active_span,
     trace,
 )
-from synapse.storage.databases.main.registration import TokenLookupResult
-from synapse.types import Requester, UserID, create_requester
+from synapse.types import Requester, create_requester
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -70,14 +69,14 @@ class Auth:
     async def check_user_in_room(
         self,
         room_id: str,
-        user_id: str,
+        requester: Requester,
         allow_departed_users: bool = False,
     ) -> Tuple[str, Optional[str]]:
         """Check if the user is in the room, or was at some point.
         Args:
             room_id: The room to check.
 
-            user_id: The user to check.
+            requester: The user making the request, according to the access token.
 
             current_state: Optional map of the current state of the room.
                 If provided then that map is used to check whether they are a
@@ -94,6 +93,7 @@ class Auth:
             membership event ID of the user.
         """
 
+        user_id = requester.user.to_string()
         (
             membership,
             member_event_id,
@@ -182,96 +182,69 @@ class Auth:
 
             access_token = self.get_access_token_from_request(request)
 
-            (
-                user_id,
-                device_id,
-                app_service,
-            ) = await self._get_appservice_user_id_and_device_id(request)
-            if user_id and app_service:
-                if ip_addr and self._track_appservice_user_ips:
-                    await self.store.insert_client_ip(
-                        user_id=user_id,
-                        access_token=access_token,
-                        ip=ip_addr,
-                        user_agent=user_agent,
-                        device_id="dummy-device"
-                        if device_id is None
-                        else device_id,  # stubbed
-                    )
-
-                requester = create_requester(
-                    user_id, app_service=app_service, device_id=device_id
+            # First check if it could be a request from an appservice
+            requester = await self._get_appservice_user(request)
+            if not requester:
+                # If not, it should be from a regular user
+                requester = await self.get_user_by_access_token(
+                    access_token, allow_expired=allow_expired
                 )
 
-                request.requester = user_id
-                return requester
-
-            user_info = await self.get_user_by_access_token(
-                access_token, allow_expired=allow_expired
-            )
-            token_id = user_info.token_id
-            is_guest = user_info.is_guest
-            shadow_banned = user_info.shadow_banned
-
-            # Deny the request if the user account has expired.
-            if not allow_expired:
-                if await self._account_validity_handler.is_user_expired(
-                    user_info.user_id
-                ):
-                    # Raise the error if either an account validity module has determined
-                    # the account has expired, or the legacy account validity
-                    # implementation is enabled and determined the account has expired
-                    raise AuthError(
-                        403,
-                        "User account has expired",
-                        errcode=Codes.EXPIRED_ACCOUNT,
-                    )
-
-            device_id = user_info.device_id
-
-            if access_token and ip_addr:
+                # Deny the request if the user account has expired.
+                # This check is only done for regular users, not appservice ones.
+                if not allow_expired:
+                    if await self._account_validity_handler.is_user_expired(
+                        requester.user.to_string()
+                    ):
+                        # Raise the error if either an account validity module has determined
+                        # the account has expired, or the legacy account validity
+                        # implementation is enabled and determined the account has expired
+                        raise AuthError(
+                            403,
+                            "User account has expired",
+                            errcode=Codes.EXPIRED_ACCOUNT,
+                        )
+
+            if ip_addr and (
+                not requester.app_service or self._track_appservice_user_ips
+            ):
+                # XXX(quenting): I'm 95% confident that we could skip setting the
+                # device_id to "dummy-device" for appservices, and that the only impact
+                # would be some rows which whould not deduplicate in the 'user_ips'
+                # table during the transition
+                recorded_device_id = (
+                    "dummy-device"
+                    if requester.device_id is None and requester.app_service is not None
+                    else requester.device_id
+                )
                 await self.store.insert_client_ip(
-                    user_id=user_info.token_owner,
+                    user_id=requester.authenticated_entity,
                     access_token=access_token,
                     ip=ip_addr,
                     user_agent=user_agent,
-                    device_id=device_id,
+                    device_id=recorded_device_id,
                 )
+
                 # Track also the puppeted user client IP if enabled and the user is puppeting
                 if (
-                    user_info.user_id != user_info.token_owner
+                    requester.user.to_string() != requester.authenticated_entity
                     and self._track_puppeted_user_ips
                 ):
                     await self.store.insert_client_ip(
-                        user_id=user_info.user_id,
+                        user_id=requester.user.to_string(),
                         access_token=access_token,
                         ip=ip_addr,
                         user_agent=user_agent,
-                        device_id=device_id,
+                        device_id=requester.device_id,
                     )
 
-            if is_guest and not allow_guest:
+            if requester.is_guest and not allow_guest:
                 raise AuthError(
                     403,
                     "Guest access not allowed",
                     errcode=Codes.GUEST_ACCESS_FORBIDDEN,
                 )
 
-            # Mark the token as used. This is used to invalidate old refresh
-            # tokens after some time.
-            if not user_info.token_used and token_id is not None:
-                await self.store.mark_access_token_as_used(token_id)
-
-            requester = create_requester(
-                user_info.user_id,
-                token_id,
-                is_guest,
-                shadow_banned,
-                device_id,
-                app_service=app_service,
-                authenticated_entity=user_info.token_owner,
-            )
-
             request.requester = requester
             return requester
         except KeyError:
@@ -308,9 +281,7 @@ class Auth:
                 403, "Application service has not registered this user (%s)" % user_id
             )
 
-    async def _get_appservice_user_id_and_device_id(
-        self, request: Request
-    ) -> Tuple[Optional[str], Optional[str], Optional[ApplicationService]]:
+    async def _get_appservice_user(self, request: Request) -> Optional[Requester]:
         """
         Given a request, reads the request parameters to determine:
         - whether it's an application service that's making this request
@@ -325,15 +296,13 @@ class Auth:
              Must use `org.matrix.msc3202.device_id` in place of `device_id` for now.
 
         Returns:
-            3-tuple of
-            (user ID?, device ID?, application service?)
+            the application service `Requester` of that request
 
         Postconditions:
-        - If an application service is returned, so is a user ID
-        - A user ID is never returned without an application service
-        - A device ID is never returned without a user ID or an application service
-        - The returned application service, if present, is permitted to control the
-          returned user ID.
+        - The `app_service` field in the returned `Requester` is set
+        - The `user_id` field in the returned `Requester` is either the application
+          service sender or the controlled user set by the `user_id` URI parameter
+        - The returned application service is permitted to control the returned user ID.
         - The returned device ID, if present, has been checked to be a valid device ID
           for the returned user ID.
         """
@@ -343,12 +312,12 @@ class Auth:
             self.get_access_token_from_request(request)
         )
         if app_service is None:
-            return None, None, None
+            return None
 
         if app_service.ip_range_whitelist:
             ip_address = IPAddress(request.getClientAddress().host)
             if ip_address not in app_service.ip_range_whitelist:
-                return None, None, None
+                return None
 
         # This will always be set by the time Twisted calls us.
         assert request.args is not None
@@ -382,13 +351,15 @@ class Auth:
                     Codes.EXCLUSIVE,
                 )
 
-        return effective_user_id, effective_device_id, app_service
+        return create_requester(
+            effective_user_id, app_service=app_service, device_id=effective_device_id
+        )
 
     async def get_user_by_access_token(
         self,
         token: str,
         allow_expired: bool = False,
-    ) -> TokenLookupResult:
+    ) -> Requester:
         """Validate access token and get user_id from it
 
         Args:
@@ -405,9 +376,9 @@ class Auth:
 
         # First look in the database to see if the access token is present
         # as an opaque token.
-        r = await self.store.get_user_by_access_token(token)
-        if r:
-            valid_until_ms = r.valid_until_ms
+        user_info = await self.store.get_user_by_access_token(token)
+        if user_info:
+            valid_until_ms = user_info.valid_until_ms
             if (
                 not allow_expired
                 and valid_until_ms is not None
@@ -419,7 +390,20 @@ class Auth:
                     msg="Access token has expired", soft_logout=True
                 )
 
-            return r
+            # Mark the token as used. This is used to invalidate old refresh
+            # tokens after some time.
+            await self.store.mark_access_token_as_used(user_info.token_id)
+
+            requester = create_requester(
+                user_id=user_info.user_id,
+                access_token_id=user_info.token_id,
+                is_guest=user_info.is_guest,
+                shadow_banned=user_info.shadow_banned,
+                device_id=user_info.device_id,
+                authenticated_entity=user_info.token_owner,
+            )
+
+            return requester
 
         # If the token isn't found in the database, then it could still be a
         # macaroon for a guest, so we check that here.
@@ -445,11 +429,12 @@ class Auth:
                     "Guest access token used for regular user"
                 )
 
-            return TokenLookupResult(
+            return create_requester(
                 user_id=user_id,
                 is_guest=True,
                 # all guests get the same device id
                 device_id=GUEST_DEVICE_ID,
+                authenticated_entity=user_id,
             )
         except (
             pymacaroons.exceptions.MacaroonException,
@@ -472,32 +457,33 @@ class Auth:
         request.requester = create_requester(service.sender, app_service=service)
         return service
 
-    async def is_server_admin(self, user: UserID) -> bool:
+    async def is_server_admin(self, requester: Requester) -> bool:
         """Check if the given user is a local server admin.
 
         Args:
-            user: user to check
+            requester: The user making the request, according to the access token.
 
         Returns:
             True if the user is an admin
         """
-        return await self.store.is_server_admin(user)
+        return await self.store.is_server_admin(requester.user)
 
-    async def check_can_change_room_list(self, room_id: str, user: UserID) -> bool:
+    async def check_can_change_room_list(
+        self, room_id: str, requester: Requester
+    ) -> bool:
         """Determine whether the user is allowed to edit the room's entry in the
         published room list.
 
         Args:
-            room_id
-            user
+            room_id: The room to check.
+            requester: The user making the request, according to the access token.
         """
 
-        is_admin = await self.is_server_admin(user)
+        is_admin = await self.is_server_admin(requester)
         if is_admin:
             return True
 
-        user_id = user.to_string()
-        await self.check_user_in_room(room_id, user_id)
+        await self.check_user_in_room(room_id, requester)
 
         # We currently require the user is a "moderator" in the room. We do this
         # by checking if they would (theoretically) be able to change the
@@ -516,7 +502,9 @@ class Auth:
         send_level = event_auth.get_send_level(
             EventTypes.CanonicalAlias, "", power_level_event
         )
-        user_level = event_auth.get_user_power_level(user_id, auth_events)
+        user_level = event_auth.get_user_power_level(
+            requester.user.to_string(), auth_events
+        )
 
         return user_level >= send_level
 
@@ -574,16 +562,16 @@ class Auth:
 
     @trace
     async def check_user_in_room_or_world_readable(
-        self, room_id: str, user_id: str, allow_departed_users: bool = False
+        self, room_id: str, requester: Requester, allow_departed_users: bool = False
     ) -> Tuple[str, Optional[str]]:
         """Checks that the user is or was in the room or the room is world
         readable. If it isn't then an exception is raised.
 
         Args:
-            room_id: room to check
-            user_id: user to check
-            allow_departed_users: if True, accept users that were previously
-                members but have now departed
+            room_id: The room to check.
+            requester: The user making the request, according to the access token.
+            allow_departed_users: If True, accept users that were previously
+                members but have now departed.
 
         Returns:
             Resolves to the current membership of the user in the room and the
@@ -598,7 +586,7 @@ class Auth:
             #  * The user is a guest user, and has joined the room
             # else it will throw.
             return await self.check_user_in_room(
-                room_id, user_id, allow_departed_users=allow_departed_users
+                room_id, requester, allow_departed_users=allow_departed_users
             )
         except AuthError:
             visibility = await self._storage_controllers.state.get_current_state_event(
@@ -613,6 +601,6 @@ class Auth:
             raise UnstableSpecAuthError(
                 403,
                 "User %s not in room %s, and room previews are disabled"
-                % (user_id, room_id),
+                % (requester.user, room_id),
                 errcode=Codes.NOT_JOINED,
             )
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index bfa5535044..0327fc57a4 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -280,7 +280,7 @@ class AuthHandler:
         that it isn't stolen by re-authenticating them.
 
         Args:
-            requester: The user, as given by the access token
+            requester: The user making the request, according to the access token.
 
             request: The request sent by the client.
 
@@ -1435,20 +1435,25 @@ class AuthHandler:
             access_token: access token to be deleted
 
         """
-        user_info = await self.auth.get_user_by_access_token(access_token)
+        token = await self.store.get_user_by_access_token(access_token)
+        if not token:
+            # At this point, the token should already have been fetched once by
+            # the caller, so this should not happen, unless of a race condition
+            # between two delete requests
+            raise SynapseError(HTTPStatus.UNAUTHORIZED, "Unrecognised access token")
         await self.store.delete_access_token(access_token)
 
         # see if any modules want to know about this
         await self.password_auth_provider.on_logged_out(
-            user_id=user_info.user_id,
-            device_id=user_info.device_id,
+            user_id=token.user_id,
+            device_id=token.device_id,
             access_token=access_token,
         )
 
         # delete pushers associated with this access token
-        if user_info.token_id is not None:
+        if token.token_id is not None:
             await self.hs.get_pusherpool().remove_pushers_by_access_token(
-                user_info.user_id, (user_info.token_id,)
+                token.user_id, (token.token_id,)
             )
 
     async def delete_access_tokens_for_user(
diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py
index 09a7a4b238..948f66a94d 100644
--- a/synapse/handlers/directory.py
+++ b/synapse/handlers/directory.py
@@ -30,7 +30,7 @@ from synapse.api.errors import (
 from synapse.appservice import ApplicationService
 from synapse.module_api import NOT_SPAM
 from synapse.storage.databases.main.directory import RoomAliasMapping
-from synapse.types import JsonDict, Requester, RoomAlias, UserID, get_domain_from_id
+from synapse.types import JsonDict, Requester, RoomAlias, get_domain_from_id
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -133,7 +133,7 @@ class DirectoryHandler:
         else:
             # Server admins are not subject to the same constraints as normal
             # users when creating an alias (e.g. being in the room).
-            is_admin = await self.auth.is_server_admin(requester.user)
+            is_admin = await self.auth.is_server_admin(requester)
 
             if (self.require_membership and check_membership) and not is_admin:
                 rooms_for_user = await self.store.get_rooms_for_user(user_id)
@@ -197,7 +197,7 @@ class DirectoryHandler:
         user_id = requester.user.to_string()
 
         try:
-            can_delete = await self._user_can_delete_alias(room_alias, user_id)
+            can_delete = await self._user_can_delete_alias(room_alias, requester)
         except StoreError as e:
             if e.code == 404:
                 raise NotFoundError("Unknown room alias")
@@ -400,7 +400,9 @@ class DirectoryHandler:
         # either no interested services, or no service with an exclusive lock
         return True
 
-    async def _user_can_delete_alias(self, alias: RoomAlias, user_id: str) -> bool:
+    async def _user_can_delete_alias(
+        self, alias: RoomAlias, requester: Requester
+    ) -> bool:
         """Determine whether a user can delete an alias.
 
         One of the following must be true:
@@ -413,7 +415,7 @@ class DirectoryHandler:
         """
         creator = await self.store.get_room_alias_creator(alias.to_string())
 
-        if creator == user_id:
+        if creator == requester.user.to_string():
             return True
 
         # Resolve the alias to the corresponding room.
@@ -422,9 +424,7 @@ class DirectoryHandler:
         if not room_id:
             return False
 
-        return await self.auth.check_can_change_room_list(
-            room_id, UserID.from_string(user_id)
-        )
+        return await self.auth.check_can_change_room_list(room_id, requester)
 
     async def edit_published_room_list(
         self, requester: Requester, room_id: str, visibility: str
@@ -463,7 +463,7 @@ class DirectoryHandler:
             raise SynapseError(400, "Unknown room")
 
         can_change_room_list = await self.auth.check_can_change_room_list(
-            room_id, requester.user
+            room_id, requester
         )
         if not can_change_room_list:
             raise AuthError(
@@ -528,10 +528,8 @@ class DirectoryHandler:
         Get a list of the aliases that currently point to this room on this server
         """
         # allow access to server admins and current members of the room
-        is_admin = await self.auth.is_server_admin(requester.user)
+        is_admin = await self.auth.is_server_admin(requester)
         if not is_admin:
-            await self.auth.check_user_in_room_or_world_readable(
-                room_id, requester.user.to_string()
-            )
+            await self.auth.check_user_in_room_or_world_readable(room_id, requester)
 
         return await self.store.get_aliases_for_room(room_id)
diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py
index 6484e47e5f..860c82c110 100644
--- a/synapse/handlers/initial_sync.py
+++ b/synapse/handlers/initial_sync.py
@@ -309,18 +309,18 @@ class InitialSyncHandler:
         if blocked:
             raise SynapseError(403, "This room has been blocked on this server")
 
-        user_id = requester.user.to_string()
-
         (
             membership,
             member_event_id,
         ) = await self.auth.check_user_in_room_or_world_readable(
             room_id,
-            user_id,
+            requester,
             allow_departed_users=True,
         )
         is_peeking = member_event_id is None
 
+        user_id = requester.user.to_string()
+
         if membership == Membership.JOIN:
             result = await self._room_initial_sync_joined(
                 user_id, room_id, pagin_config, membership, is_peeking
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 8f29ee9a87..acd3de06f6 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -104,7 +104,7 @@ class MessageHandler:
 
     async def get_room_data(
         self,
-        user_id: str,
+        requester: Requester,
         room_id: str,
         event_type: str,
         state_key: str,
@@ -112,7 +112,7 @@ class MessageHandler:
         """Get data from a room.
 
         Args:
-            user_id
+            requester: The user who did the request.
             room_id
             event_type
             state_key
@@ -125,7 +125,7 @@ class MessageHandler:
             membership,
             membership_event_id,
         ) = await self.auth.check_user_in_room_or_world_readable(
-            room_id, user_id, allow_departed_users=True
+            room_id, requester, allow_departed_users=True
         )
 
         if membership == Membership.JOIN:
@@ -161,11 +161,10 @@ class MessageHandler:
 
     async def get_state_events(
         self,
-        user_id: str,
+        requester: Requester,
         room_id: str,
         state_filter: Optional[StateFilter] = None,
         at_token: Optional[StreamToken] = None,
-        is_guest: bool = False,
     ) -> List[dict]:
         """Retrieve all state events for a given room. If the user is
         joined to the room then return the current state. If the user has
@@ -174,14 +173,13 @@ class MessageHandler:
         visible.
 
         Args:
-            user_id: The user requesting state events.
+            requester: The user requesting state events.
             room_id: The room ID to get all state events from.
             state_filter: The state filter used to fetch state from the database.
             at_token: the stream token of the at which we are requesting
                 the stats. If the user is not allowed to view the state as of that
                 stream token, we raise a 403 SynapseError. If None, returns the current
                 state based on the current_state_events table.
-            is_guest: whether this user is a guest
         Returns:
             A list of dicts representing state events. [{}, {}, {}]
         Raises:
@@ -191,6 +189,7 @@ class MessageHandler:
             members of this room.
         """
         state_filter = state_filter or StateFilter.all()
+        user_id = requester.user.to_string()
 
         if at_token:
             last_event_id = (
@@ -223,7 +222,7 @@ class MessageHandler:
                 membership,
                 membership_event_id,
             ) = await self.auth.check_user_in_room_or_world_readable(
-                room_id, user_id, allow_departed_users=True
+                room_id, requester, allow_departed_users=True
             )
 
             if membership == Membership.JOIN:
@@ -317,12 +316,11 @@ class MessageHandler:
         Returns:
             A dict of user_id to profile info
         """
-        user_id = requester.user.to_string()
         if not requester.app_service:
             # We check AS auth after fetching the room membership, as it
             # requires us to pull out all joined members anyway.
             membership, _ = await self.auth.check_user_in_room_or_world_readable(
-                room_id, user_id, allow_departed_users=True
+                room_id, requester, allow_departed_users=True
             )
             if membership != Membership.JOIN:
                 raise SynapseError(
@@ -340,7 +338,10 @@ class MessageHandler:
         # If this is an AS, double check that they are allowed to see the members.
         # This can either be because the AS user is in the room or because there
         # is a user in the room that the AS is "interested in"
-        if requester.app_service and user_id not in users_with_profile:
+        if (
+            requester.app_service
+            and requester.user.to_string() not in users_with_profile
+        ):
             for uid in users_with_profile:
                 if requester.app_service.is_interested_in_user(uid):
                     break
diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index e1e34e3b16..74e944bce7 100644
--- a/synapse/handlers/pagination.py
+++ b/synapse/handlers/pagination.py
@@ -464,7 +464,7 @@ class PaginationHandler:
                 membership,
                 member_event_id,
             ) = await self.auth.check_user_in_room_or_world_readable(
-                room_id, user_id, allow_departed_users=True
+                room_id, requester, allow_departed_users=True
             )
 
             if pagin_config.direction == "b":
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index c77d181722..20ec22105a 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -29,7 +29,13 @@ from synapse.api.constants import (
     JoinRules,
     LoginType,
 )
-from synapse.api.errors import AuthError, Codes, ConsentNotGivenError, SynapseError
+from synapse.api.errors import (
+    AuthError,
+    Codes,
+    ConsentNotGivenError,
+    InvalidClientTokenError,
+    SynapseError,
+)
 from synapse.appservice import ApplicationService
 from synapse.config.server import is_threepid_reserved
 from synapse.http.servlet import assert_params_in_dict
@@ -180,10 +186,7 @@ class RegistrationHandler:
                 )
             if guest_access_token:
                 user_data = await self.auth.get_user_by_access_token(guest_access_token)
-                if (
-                    not user_data.is_guest
-                    or UserID.from_string(user_data.user_id).localpart != localpart
-                ):
+                if not user_data.is_guest or user_data.user.localpart != localpart:
                     raise AuthError(
                         403,
                         "Cannot register taken user ID without valid guest "
@@ -618,7 +621,7 @@ class RegistrationHandler:
         user_id = user.to_string()
         service = self.store.get_app_service_by_token(as_token)
         if not service:
-            raise AuthError(403, "Invalid application service token.")
+            raise InvalidClientTokenError()
         if not service.is_interested_in_user(user_id):
             raise SynapseError(
                 400,
diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py
index 72d25df8c8..28d7093f08 100644
--- a/synapse/handlers/relations.py
+++ b/synapse/handlers/relations.py
@@ -103,7 +103,7 @@ class RelationsHandler:
 
         # TODO Properly handle a user leaving a room.
         (_, member_event_id) = await self._auth.check_user_in_room_or_world_readable(
-            room_id, user_id, allow_departed_users=True
+            room_id, requester, allow_departed_users=True
         )
 
         # This gets the original event and checks that a) the event exists and
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 55395457c3..2bf0ebd025 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -721,7 +721,7 @@ class RoomCreationHandler:
             # allow the server notices mxid to create rooms
             is_requester_admin = True
         else:
-            is_requester_admin = await self.auth.is_server_admin(requester.user)
+            is_requester_admin = await self.auth.is_server_admin(requester)
 
         # Let the third party rules modify the room creation config if needed, or abort
         # the room creation entirely with an exception.
@@ -1279,7 +1279,7 @@ class RoomContextHandler:
         """
         user = requester.user
         if use_admin_priviledge:
-            await assert_user_is_admin(self.auth, requester.user)
+            await assert_user_is_admin(self.auth, requester)
 
         before_limit = math.floor(limit / 2.0)
         after_limit = limit - before_limit
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 70dc69c809..d1909665d6 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -179,7 +179,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         """Try and join a room that this server is not in
 
         Args:
-            requester
+            requester: The user making the request, according to the access token.
             remote_room_hosts: List of servers that can be used to join via.
             room_id: Room that we are trying to join
             user: User who is trying to join
@@ -744,7 +744,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                 is_requester_admin = True
 
             else:
-                is_requester_admin = await self.auth.is_server_admin(requester.user)
+                is_requester_admin = await self.auth.is_server_admin(requester)
 
             if not is_requester_admin:
                 if self.config.server.block_non_admin_invites:
@@ -868,7 +868,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                 bypass_spam_checker = True
 
             else:
-                bypass_spam_checker = await self.auth.is_server_admin(requester.user)
+                bypass_spam_checker = await self.auth.is_server_admin(requester)
 
             inviter = await self._get_inviter(target.to_string(), room_id)
             if (
@@ -1410,7 +1410,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             ShadowBanError if the requester has been shadow-banned.
         """
         if self.config.server.block_non_admin_invites:
-            is_requester_admin = await self.auth.is_server_admin(requester.user)
+            is_requester_admin = await self.auth.is_server_admin(requester)
             if not is_requester_admin:
                 raise SynapseError(
                     403, "Invites have been disabled on this server", Codes.FORBIDDEN
@@ -1693,7 +1693,7 @@ class RoomMemberMasterHandler(RoomMemberHandler):
             check_complexity
             and self.hs.config.server.limit_remote_rooms.admins_can_join
         ):
-            check_complexity = not await self.auth.is_server_admin(user)
+            check_complexity = not await self.store.is_server_admin(user)
 
         if check_complexity:
             # Fetch the room complexity
diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py
index 27aa0d3126..bcac3372a2 100644
--- a/synapse/handlers/typing.py
+++ b/synapse/handlers/typing.py
@@ -253,12 +253,11 @@ class TypingWriterHandler(FollowerTypingHandler):
         self, target_user: UserID, requester: Requester, room_id: str, timeout: int
     ) -> None:
         target_user_id = target_user.to_string()
-        auth_user_id = requester.user.to_string()
 
         if not self.is_mine_id(target_user_id):
             raise SynapseError(400, "User is not hosted on this homeserver")
 
-        if target_user_id != auth_user_id:
+        if target_user != requester.user:
             raise AuthError(400, "Cannot set another user's typing state")
 
         if requester.shadow_banned:
@@ -266,7 +265,7 @@ class TypingWriterHandler(FollowerTypingHandler):
             await self.clock.sleep(random.randint(1, 10))
             raise ShadowBanError()
 
-        await self.auth.check_user_in_room(room_id, target_user_id)
+        await self.auth.check_user_in_room(room_id, requester)
 
         logger.debug("%s has started typing in %s", target_user_id, room_id)
 
@@ -289,12 +288,11 @@ class TypingWriterHandler(FollowerTypingHandler):
         self, target_user: UserID, requester: Requester, room_id: str
     ) -> None:
         target_user_id = target_user.to_string()
-        auth_user_id = requester.user.to_string()
 
         if not self.is_mine_id(target_user_id):
             raise SynapseError(400, "User is not hosted on this homeserver")
 
-        if target_user_id != auth_user_id:
+        if target_user != requester.user:
             raise AuthError(400, "Cannot set another user's typing state")
 
         if requester.shadow_banned:
@@ -302,7 +300,7 @@ class TypingWriterHandler(FollowerTypingHandler):
             await self.clock.sleep(random.randint(1, 10))
             raise ShadowBanError()
 
-        await self.auth.check_user_in_room(room_id, target_user_id)
+        await self.auth.check_user_in_room(room_id, requester)
 
         logger.debug("%s has stopped typing in %s", target_user_id, room_id)
 
diff --git a/synapse/http/site.py b/synapse/http/site.py
index eeec74b78a..1155f3f610 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -226,7 +226,7 @@ class SynapseRequest(Request):
 
             # If this is a request where the target user doesn't match the user who
             # authenticated (e.g. and admin is puppetting a user) then we return both.
-            if self._requester.user.to_string() != authenticated_entity:
+            if requester != authenticated_entity:
                 return requester, authenticated_entity
 
             return requester, None
diff --git a/synapse/rest/admin/_base.py b/synapse/rest/admin/_base.py
index 399b205aaf..b467a61dfb 100644
--- a/synapse/rest/admin/_base.py
+++ b/synapse/rest/admin/_base.py
@@ -19,7 +19,7 @@ from typing import Iterable, Pattern
 from synapse.api.auth import Auth
 from synapse.api.errors import AuthError
 from synapse.http.site import SynapseRequest
-from synapse.types import UserID
+from synapse.types import Requester
 
 
 def admin_patterns(path_regex: str, version: str = "v1") -> Iterable[Pattern]:
@@ -48,19 +48,19 @@ async def assert_requester_is_admin(auth: Auth, request: SynapseRequest) -> None
         AuthError if the requester is not a server admin
     """
     requester = await auth.get_user_by_req(request)
-    await assert_user_is_admin(auth, requester.user)
+    await assert_user_is_admin(auth, requester)
 
 
-async def assert_user_is_admin(auth: Auth, user_id: UserID) -> None:
+async def assert_user_is_admin(auth: Auth, requester: Requester) -> None:
     """Verify that the given user is an admin user
 
     Args:
         auth: Auth singleton
-        user_id: user to check
+        requester: The user making the request, according to the access token.
 
     Raises:
         AuthError if the user is not a server admin
     """
-    is_admin = await auth.is_server_admin(user_id)
+    is_admin = await auth.is_server_admin(requester)
     if not is_admin:
         raise AuthError(HTTPStatus.FORBIDDEN, "You are not a server admin")
diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py
index 19d4a008e8..73470f09ae 100644
--- a/synapse/rest/admin/media.py
+++ b/synapse/rest/admin/media.py
@@ -54,7 +54,7 @@ class QuarantineMediaInRoom(RestServlet):
         self, request: SynapseRequest, room_id: str
     ) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
-        await assert_user_is_admin(self.auth, requester.user)
+        await assert_user_is_admin(self.auth, requester)
 
         logging.info("Quarantining room: %s", room_id)
 
@@ -81,7 +81,7 @@ class QuarantineMediaByUser(RestServlet):
         self, request: SynapseRequest, user_id: str
     ) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
-        await assert_user_is_admin(self.auth, requester.user)
+        await assert_user_is_admin(self.auth, requester)
 
         logging.info("Quarantining media by user: %s", user_id)
 
@@ -110,7 +110,7 @@ class QuarantineMediaByID(RestServlet):
         self, request: SynapseRequest, server_name: str, media_id: str
     ) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
-        await assert_user_is_admin(self.auth, requester.user)
+        await assert_user_is_admin(self.auth, requester)
 
         logging.info("Quarantining media by ID: %s/%s", server_name, media_id)
 
diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py
index 68054ffc28..3d870629c4 100644
--- a/synapse/rest/admin/rooms.py
+++ b/synapse/rest/admin/rooms.py
@@ -75,7 +75,7 @@ class RoomRestV2Servlet(RestServlet):
     ) -> Tuple[int, JsonDict]:
 
         requester = await self._auth.get_user_by_req(request)
-        await assert_user_is_admin(self._auth, requester.user)
+        await assert_user_is_admin(self._auth, requester)
 
         content = parse_json_object_from_request(request)
 
@@ -327,7 +327,7 @@ class RoomRestServlet(RestServlet):
         pagination_handler: "PaginationHandler",
     ) -> Tuple[int, JsonDict]:
         requester = await auth.get_user_by_req(request)
-        await assert_user_is_admin(auth, requester.user)
+        await assert_user_is_admin(auth, requester)
 
         content = parse_json_object_from_request(request)
 
@@ -461,7 +461,7 @@ class JoinRoomAliasServlet(ResolveRoomIdMixin, RestServlet):
         assert request.args is not None
 
         requester = await self.auth.get_user_by_req(request)
-        await assert_user_is_admin(self.auth, requester.user)
+        await assert_user_is_admin(self.auth, requester)
 
         content = parse_json_object_from_request(request)
 
@@ -551,7 +551,7 @@ class MakeRoomAdminRestServlet(ResolveRoomIdMixin, RestServlet):
         self, request: SynapseRequest, room_identifier: str
     ) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
-        await assert_user_is_admin(self.auth, requester.user)
+        await assert_user_is_admin(self.auth, requester)
         content = parse_json_object_from_request(request, allow_empty_body=True)
 
         room_id, _ = await self.resolve_room_id(room_identifier)
@@ -742,7 +742,7 @@ class RoomEventContextServlet(RestServlet):
         self, request: SynapseRequest, room_id: str, event_id: str
     ) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request, allow_guest=False)
-        await assert_user_is_admin(self.auth, requester.user)
+        await assert_user_is_admin(self.auth, requester)
 
         limit = parse_integer(request, "limit", default=10)
 
@@ -834,7 +834,7 @@ class BlockRoomRestServlet(RestServlet):
         self, request: SynapseRequest, room_id: str
     ) -> Tuple[int, JsonDict]:
         requester = await self._auth.get_user_by_req(request)
-        await assert_user_is_admin(self._auth, requester.user)
+        await assert_user_is_admin(self._auth, requester)
 
         content = parse_json_object_from_request(request)
 
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index ba2f7fa6d8..78ee9b6532 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -183,7 +183,7 @@ class UserRestServletV2(RestServlet):
         self, request: SynapseRequest, user_id: str
     ) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
-        await assert_user_is_admin(self.auth, requester.user)
+        await assert_user_is_admin(self.auth, requester)
 
         target_user = UserID.from_string(user_id)
         body = parse_json_object_from_request(request)
@@ -575,10 +575,9 @@ class WhoisRestServlet(RestServlet):
     ) -> Tuple[int, JsonDict]:
         target_user = UserID.from_string(user_id)
         requester = await self.auth.get_user_by_req(request)
-        auth_user = requester.user
 
-        if target_user != auth_user:
-            await assert_user_is_admin(self.auth, auth_user)
+        if target_user != requester.user:
+            await assert_user_is_admin(self.auth, requester)
 
         if not self.is_mine(target_user):
             raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only whois a local user")
@@ -601,7 +600,7 @@ class DeactivateAccountRestServlet(RestServlet):
         self, request: SynapseRequest, target_user_id: str
     ) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
-        await assert_user_is_admin(self.auth, requester.user)
+        await assert_user_is_admin(self.auth, requester)
 
         if not self.is_mine(UserID.from_string(target_user_id)):
             raise SynapseError(
@@ -693,7 +692,7 @@ class ResetPasswordRestServlet(RestServlet):
         This needs user to have administrator access in Synapse.
         """
         requester = await self.auth.get_user_by_req(request)
-        await assert_user_is_admin(self.auth, requester.user)
+        await assert_user_is_admin(self.auth, requester)
 
         UserID.from_string(target_user_id)
 
@@ -807,7 +806,7 @@ class UserAdminServlet(RestServlet):
         self, request: SynapseRequest, user_id: str
     ) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
-        await assert_user_is_admin(self.auth, requester.user)
+        await assert_user_is_admin(self.auth, requester)
         auth_user = requester.user
 
         target_user = UserID.from_string(user_id)
@@ -921,7 +920,7 @@ class UserTokenRestServlet(RestServlet):
         self, request: SynapseRequest, user_id: str
     ) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
-        await assert_user_is_admin(self.auth, requester.user)
+        await assert_user_is_admin(self.auth, requester)
         auth_user = requester.user
 
         if not self.is_mine_id(user_id):
diff --git a/synapse/rest/client/profile.py b/synapse/rest/client/profile.py
index c16d707909..e69fa0829d 100644
--- a/synapse/rest/client/profile.py
+++ b/synapse/rest/client/profile.py
@@ -66,7 +66,7 @@ class ProfileDisplaynameRestServlet(RestServlet):
     ) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request, allow_guest=True)
         user = UserID.from_string(user_id)
-        is_admin = await self.auth.is_server_admin(requester.user)
+        is_admin = await self.auth.is_server_admin(requester)
 
         content = parse_json_object_from_request(request)
 
@@ -123,7 +123,7 @@ class ProfileAvatarURLRestServlet(RestServlet):
     ) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
         user = UserID.from_string(user_id)
-        is_admin = await self.auth.is_server_admin(requester.user)
+        is_admin = await self.auth.is_server_admin(requester)
 
         content = parse_json_object_from_request(request)
         try:
diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py
index 956c45e60a..1b953d3fa0 100644
--- a/synapse/rest/client/register.py
+++ b/synapse/rest/client/register.py
@@ -484,9 +484,6 @@ class RegisterRestServlet(RestServlet):
                     "Appservice token must be provided when using a type of m.login.application_service",
                 )
 
-            # Verify the AS
-            self.auth.get_appservice_by_req(request)
-
             # Set the desired user according to the AS API (which uses the
             # 'user' key not 'username'). Since this is a new addition, we'll
             # fallback to 'username' if they gave one.
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index 13bc9482c5..0eafbae457 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -229,7 +229,7 @@ class RoomStateEventRestServlet(TransactionRestServlet):
 
         msg_handler = self.message_handler
         data = await msg_handler.get_room_data(
-            user_id=requester.user.to_string(),
+            requester=requester,
             room_id=room_id,
             event_type=event_type,
             state_key=state_key,
@@ -574,7 +574,7 @@ class RoomMemberListRestServlet(RestServlet):
 
         events = await handler.get_state_events(
             room_id=room_id,
-            user_id=requester.user.to_string(),
+            requester=requester,
             at_token=at_token,
             state_filter=StateFilter.from_types([(EventTypes.Member, None)]),
         )
@@ -696,8 +696,7 @@ class RoomStateRestServlet(RestServlet):
         # Get all the current state for this room
         events = await self.message_handler.get_state_events(
             room_id=room_id,
-            user_id=requester.user.to_string(),
-            is_guest=requester.is_guest,
+            requester=requester,
         )
         return 200, events
 
@@ -755,7 +754,7 @@ class RoomEventServlet(RestServlet):
             == "true"
         )
         if include_unredacted_content and not await self.auth.is_server_admin(
-            requester.user
+            requester
         ):
             power_level_event = (
                 await self._storage_controllers.state.get_current_state_event(
@@ -1260,9 +1259,7 @@ class TimestampLookupRestServlet(RestServlet):
         self, request: SynapseRequest, room_id: str
     ) -> Tuple[int, JsonDict]:
         requester = await self._auth.get_user_by_req(request)
-        await self._auth.check_user_in_room_or_world_readable(
-            room_id, requester.user.to_string()
-        )
+        await self._auth.check_user_in_room_or_world_readable(room_id, requester)
 
         timestamp = parse_integer(request, "ts", required=True)
         direction = parse_string(request, "dir", default="f", allowed_values=["f", "b"])
diff --git a/synapse/server_notices/server_notices_manager.py b/synapse/server_notices/server_notices_manager.py
index 8ecab86ec7..70d054a8f4 100644
--- a/synapse/server_notices/server_notices_manager.py
+++ b/synapse/server_notices/server_notices_manager.py
@@ -244,7 +244,7 @@ class ServerNoticesManager:
         assert self.server_notices_mxid is not None
 
         notice_user_data_in_room = await self._message_handler.get_room_data(
-            self.server_notices_mxid,
+            create_requester(self.server_notices_mxid),
             room_id,
             EventTypes.Member,
             self.server_notices_mxid,
diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py
index cb63cd9b7d..7fb9c801da 100644
--- a/synapse/storage/databases/main/registration.py
+++ b/synapse/storage/databases/main/registration.py
@@ -69,9 +69,9 @@ class TokenLookupResult:
     """
 
     user_id: str
+    token_id: int
     is_guest: bool = False
     shadow_banned: bool = False
-    token_id: Optional[int] = None
     device_id: Optional[str] = None
     valid_until_ms: Optional[int] = None
     token_owner: str = attr.ib()
diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py
index dfcfaf79b6..e0f363555b 100644
--- a/tests/api/test_auth.py
+++ b/tests/api/test_auth.py
@@ -284,10 +284,13 @@ class AuthTestCase(unittest.HomeserverTestCase):
             TokenLookupResult(
                 user_id="@baldrick:matrix.org",
                 device_id="device",
+                token_id=5,
                 token_owner="@admin:matrix.org",
+                token_used=True,
             )
         )
         self.store.insert_client_ip = simple_async_mock(None)
+        self.store.mark_access_token_as_used = simple_async_mock(None)
         request = Mock(args={})
         request.getClientAddress.return_value.host = "127.0.0.1"
         request.args[b"access_token"] = [self.test_token]
@@ -301,10 +304,13 @@ class AuthTestCase(unittest.HomeserverTestCase):
             TokenLookupResult(
                 user_id="@baldrick:matrix.org",
                 device_id="device",
+                token_id=5,
                 token_owner="@admin:matrix.org",
+                token_used=True,
             )
         )
         self.store.insert_client_ip = simple_async_mock(None)
+        self.store.mark_access_token_as_used = simple_async_mock(None)
         request = Mock(args={})
         request.getClientAddress.return_value.host = "127.0.0.1"
         request.args[b"access_token"] = [self.test_token]
@@ -347,7 +353,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
         serialized = macaroon.serialize()
 
         user_info = self.get_success(self.auth.get_user_by_access_token(serialized))
-        self.assertEqual(user_id, user_info.user_id)
+        self.assertEqual(user_id, user_info.user.to_string())
         self.assertTrue(user_info.is_guest)
         self.store.get_user_by_id.assert_called_with(user_id)
 
diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py
index 7af1333126..8adba29d7f 100644
--- a/tests/handlers/test_typing.py
+++ b/tests/handlers/test_typing.py
@@ -25,7 +25,7 @@ from synapse.api.constants import EduTypes
 from synapse.api.errors import AuthError
 from synapse.federation.transport.server import TransportLayerServer
 from synapse.server import HomeServer
-from synapse.types import JsonDict, UserID, create_requester
+from synapse.types import JsonDict, Requester, UserID, create_requester
 from synapse.util import Clock
 
 from tests import unittest
@@ -117,8 +117,10 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase):
 
         self.room_members = []
 
-        async def check_user_in_room(room_id: str, user_id: str) -> None:
-            if user_id not in [u.to_string() for u in self.room_members]:
+        async def check_user_in_room(room_id: str, requester: Requester) -> None:
+            if requester.user.to_string() not in [
+                u.to_string() for u in self.room_members
+            ]:
                 raise AuthError(401, "User is not in the room")
             return None
 
diff --git a/tests/rest/client/test_retention.py b/tests/rest/client/test_retention.py
index ac9c113354..9c8c1889d3 100644
--- a/tests/rest/client/test_retention.py
+++ b/tests/rest/client/test_retention.py
@@ -20,7 +20,7 @@ from synapse.api.constants import EventTypes
 from synapse.rest import admin
 from synapse.rest.client import login, room
 from synapse.server import HomeServer
-from synapse.types import JsonDict
+from synapse.types import JsonDict, create_requester
 from synapse.util import Clock
 from synapse.visibility import filter_events_for_client
 
@@ -188,7 +188,7 @@ class RetentionTestCase(unittest.HomeserverTestCase):
         message_handler = self.hs.get_message_handler()
         create_event = self.get_success(
             message_handler.get_room_data(
-                self.user_id, room_id, EventTypes.Create, state_key=""
+                create_requester(self.user_id), room_id, EventTypes.Create, state_key=""
             )
         )
 
diff --git a/tests/rest/client/test_shadow_banned.py b/tests/rest/client/test_shadow_banned.py
index d9bd8c4a28..c50f034b34 100644
--- a/tests/rest/client/test_shadow_banned.py
+++ b/tests/rest/client/test_shadow_banned.py
@@ -26,7 +26,7 @@ from synapse.rest.client import (
     room_upgrade_rest_servlet,
 )
 from synapse.server import HomeServer
-from synapse.types import UserID
+from synapse.types import UserID, create_requester
 from synapse.util import Clock
 
 from tests import unittest
@@ -275,7 +275,7 @@ class ProfileTestCase(_ShadowBannedBase):
         message_handler = self.hs.get_message_handler()
         event = self.get_success(
             message_handler.get_room_data(
-                self.banned_user_id,
+                create_requester(self.banned_user_id),
                 room_id,
                 "m.room.member",
                 self.banned_user_id,
@@ -310,7 +310,7 @@ class ProfileTestCase(_ShadowBannedBase):
         message_handler = self.hs.get_message_handler()
         event = self.get_success(
             message_handler.get_room_data(
-                self.banned_user_id,
+                create_requester(self.banned_user_id),
                 room_id,
                 "m.room.member",
                 self.banned_user_id,
-- 
cgit 1.5.1


From 9385c41ba4fd9cbc86d074ff8fa69e2ae437eb88 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 23 Aug 2022 02:47:30 -0500
Subject: Fix Prometheus metrics being negative (mixed up start/end) (#13584)

Fix:

 - https://github.com/matrix-org/synapse/pull/13535#discussion_r949582508
 - https://github.com/matrix-org/synapse/pull/13533#discussion_r949577244
---
 changelog.d/13584.misc               |  1 +
 synapse/handlers/federation.py       |  7 ++++++-
 synapse/handlers/federation_event.py | 10 ++++++++++
 synapse/rest/client/room.py          |  6 +++++-
 4 files changed, 22 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13584.misc

(limited to 'synapse')

diff --git a/changelog.d/13584.misc b/changelog.d/13584.misc
new file mode 100644
index 0000000000..6b190181c8
--- /dev/null
+++ b/changelog.d/13584.misc
@@ -0,0 +1 @@
+Add metrics to time how long it takes us to do backfill processing (`synapse_federation_backfill_processing_before_time_seconds`, `synapse_federation_backfill_processing_after_time_seconds`).
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index a09eaa4379..e151962055 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -86,9 +86,14 @@ backfill_processing_before_timer = Histogram(
     "sec",
     [],
     buckets=(
+        0.1,
+        0.5,
         1.0,
+        2.5,
         5.0,
+        7.5,
         10.0,
+        15.0,
         20.0,
         30.0,
         40.0,
@@ -482,7 +487,7 @@ class FederationHandler:
 
         processing_end_time = self.clock.time_msec()
         backfill_processing_before_timer.observe(
-            (processing_start_time - processing_end_time) / 1000
+            (processing_end_time - processing_start_time) / 1000
         )
 
         success = await try_backfill(likely_domains)
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 32326975a1..048c4111f6 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -104,15 +104,25 @@ backfill_processing_after_timer = Histogram(
     "sec",
     [],
     buckets=(
+        0.1,
+        0.25,
+        0.5,
         1.0,
+        2.5,
         5.0,
+        7.5,
         10.0,
+        15.0,
         20.0,
+        25.0,
         30.0,
         40.0,
+        50.0,
         60.0,
         80.0,
+        100.0,
         120.0,
+        150.0,
         180.0,
         "+Inf",
     ),
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index 0eafbae457..3259de4802 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -116,9 +116,13 @@ messsages_response_timer = Histogram(
         2.5,
         5.0,
         10.0,
+        20.0,
         30.0,
         60.0,
+        80.0,
+        100.0,
         120.0,
+        150.0,
         180.0,
         "+Inf",
     ),
@@ -674,7 +678,7 @@ class RoomMessageListRestServlet(RestServlet):
         room_member_count = await make_deferred_yieldable(room_member_count_deferred)
         messsages_response_timer.labels(
             room_size=_RoomSize.from_member_count(room_member_count)
-        ).observe((processing_start_time - processing_end_time) / 1000)
+        ).observe((processing_end_time - processing_start_time) / 1000)
 
         return 200, msgs
 
-- 
cgit 1.5.1


From 37f329c9adf6ed02df15661850f999edd9e5fd93 Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Tue, 23 Aug 2022 10:48:35 +0200
Subject: Fix that sending server notices fail if avatar is `None` (#13566)

Indroduced in #11846.
---
 changelog.d/13566.bugfix                           |  1 +
 synapse/handlers/room_member.py                    |  2 +-
 tests/rest/admin/test_server_notice.py             | 56 ++++++++++++++++++++++
 .../test_resource_limits_server_notices.py         |  9 ++--
 4 files changed, 64 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/13566.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13566.bugfix b/changelog.d/13566.bugfix
new file mode 100644
index 0000000000..6c44024add
--- /dev/null
+++ b/changelog.d/13566.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.52.0 where sending server notices fails if `max_avatar_size` or `allowed_avatar_mimetypes` is set and not `system_mxid_avatar_url`.
\ No newline at end of file
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index d1909665d6..65b9a655d4 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -689,7 +689,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                 errcode=Codes.BAD_JSON,
             )
 
-        if "avatar_url" in content:
+        if "avatar_url" in content and content.get("avatar_url") is not None:
             if not await self.profile_handler.check_avatar_size_and_mime_type(
                 content["avatar_url"],
             ):
diff --git a/tests/rest/admin/test_server_notice.py b/tests/rest/admin/test_server_notice.py
index 81e125e27d..a2f347f666 100644
--- a/tests/rest/admin/test_server_notice.py
+++ b/tests/rest/admin/test_server_notice.py
@@ -159,6 +159,62 @@ class ServerNoticeTestCase(unittest.HomeserverTestCase):
         self.assertEqual(Codes.UNKNOWN, channel.json_body["errcode"])
         self.assertEqual("'msgtype' not in content", channel.json_body["error"])
 
+    @override_config(
+        {
+            "server_notices": {
+                "system_mxid_localpart": "notices",
+                "system_mxid_avatar_url": "somthingwrong",
+            },
+            "max_avatar_size": "10M",
+        }
+    )
+    def test_invalid_avatar_url(self) -> None:
+        """If avatar url in homeserver.yaml is invalid and
+        "check avatar size and mime type" is set, an error is returned.
+        TODO: Should be checked when reading the configuration."""
+        channel = self.make_request(
+            "POST",
+            self.url,
+            access_token=self.admin_user_tok,
+            content={
+                "user_id": self.other_user,
+                "content": {"msgtype": "m.text", "body": "test msg"},
+            },
+        )
+
+        self.assertEqual(500, channel.code, msg=channel.json_body)
+        self.assertEqual(Codes.UNKNOWN, channel.json_body["errcode"])
+
+    @override_config(
+        {
+            "server_notices": {
+                "system_mxid_localpart": "notices",
+                "system_mxid_display_name": "test display name",
+                "system_mxid_avatar_url": None,
+            },
+            "max_avatar_size": "10M",
+        }
+    )
+    def test_displayname_is_set_avatar_is_none(self) -> None:
+        """
+        Tests that sending a server notices is successfully,
+        if a display_name is set, avatar_url is `None` and
+        "check avatar size and mime type" is set.
+        """
+        channel = self.make_request(
+            "POST",
+            self.url,
+            access_token=self.admin_user_tok,
+            content={
+                "user_id": self.other_user,
+                "content": {"msgtype": "m.text", "body": "test msg"},
+            },
+        )
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+
+        # user has one invite
+        self._check_invite_and_join_status(self.other_user, 1, 0)
+
     def test_server_notice_disabled(self) -> None:
         """Tests that server returns error if server notice is disabled"""
         channel = self.make_request(
diff --git a/tests/server_notices/test_resource_limits_server_notices.py b/tests/server_notices/test_resource_limits_server_notices.py
index e07ae78fc4..bf403045e9 100644
--- a/tests/server_notices/test_resource_limits_server_notices.py
+++ b/tests/server_notices/test_resource_limits_server_notices.py
@@ -11,16 +11,19 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 from unittest.mock import Mock
 
+from twisted.test.proto_helpers import MemoryReactor
+
 from synapse.api.constants import EventTypes, LimitBlockingTypes, ServerNoticeMsgType
 from synapse.api.errors import ResourceLimitError
 from synapse.rest import admin
 from synapse.rest.client import login, room, sync
+from synapse.server import HomeServer
 from synapse.server_notices.resource_limits_server_notices import (
     ResourceLimitsServerNotices,
 )
+from synapse.util import Clock
 
 from tests import unittest
 from tests.test_utils import make_awaitable
@@ -52,7 +55,7 @@ class TestResourceLimitsServerNotices(unittest.HomeserverTestCase):
 
         return config
 
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.server_notices_sender = self.hs.get_server_notices_sender()
 
         # relying on [1] is far from ideal, but the only case where
@@ -251,7 +254,7 @@ class TestResourceLimitsServerNoticesWithRealRooms(unittest.HomeserverTestCase):
         c["admin_contact"] = "mailto:user@test.com"
         return c
 
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = self.hs.get_datastores().main
         self.server_notices_sender = self.hs.get_server_notices_sender()
         self.server_notices_manager = self.hs.get_server_notices_manager()
-- 
cgit 1.5.1


From 5e7847dc923142bc68834f9b9538ada3fdd887d5 Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Tue, 23 Aug 2022 10:49:59 +0100
Subject: Cache user IDs instead of profile objects (#13573)

The profile objects are never used and increase cache size significantly.
---
 changelog.d/13573.misc                       |  1 +
 synapse/handlers/sync.py                     |  4 +-
 synapse/state/__init__.py                    | 13 +++---
 synapse/storage/databases/main/roommember.py | 67 ++++++++++++----------------
 synapse/util/caches/descriptors.py           | 26 ++++++++---
 5 files changed, 57 insertions(+), 54 deletions(-)
 create mode 100644 changelog.d/13573.misc

(limited to 'synapse')

diff --git a/changelog.d/13573.misc b/changelog.d/13573.misc
new file mode 100644
index 0000000000..1ce9c0c081
--- /dev/null
+++ b/changelog.d/13573.misc
@@ -0,0 +1 @@
+Cache user IDs instead of profiles to reduce cache memory usage. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index b4d3f3958c..2d95b1fa24 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -2421,10 +2421,10 @@ class SyncHandler:
                     joined_room.room_id, joined_room.event_pos.stream
                 )
             )
-            users_in_room = await self.state.get_current_users_in_room(
+            user_ids_in_room = await self.state.get_current_user_ids_in_room(
                 joined_room.room_id, extrems
             )
-            if user_id in users_in_room:
+            if user_id in user_ids_in_room:
                 joined_room_ids.add(joined_room.room_id)
 
         return frozenset(joined_room_ids)
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index c355e4f98a..3047e1b1ad 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -44,7 +44,6 @@ from synapse.logging.context import ContextResourceUsage
 from synapse.replication.http.state import ReplicationUpdateCurrentStateRestServlet
 from synapse.state import v1, v2
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
-from synapse.storage.roommember import ProfileInfo
 from synapse.storage.state import StateFilter
 from synapse.types import StateMap
 from synapse.util.async_helpers import Linearizer
@@ -210,11 +209,11 @@ class StateHandler:
         ret = await self.resolve_state_groups_for_events(room_id, event_ids)
         return await ret.get_state(self._state_storage_controller, state_filter)
 
-    async def get_current_users_in_room(
+    async def get_current_user_ids_in_room(
         self, room_id: str, latest_event_ids: List[str]
-    ) -> Dict[str, ProfileInfo]:
+    ) -> Set[str]:
         """
-        Get the users who are currently in a room.
+        Get the users IDs who are currently in a room.
 
         Note: This is much slower than using the equivalent method
         `DataStore.get_users_in_room` or `DataStore.get_users_in_room_with_profiles`,
@@ -225,15 +224,15 @@ class StateHandler:
             room_id: The ID of the room.
             latest_event_ids: Precomputed list of latest event IDs. Will be computed if None.
         Returns:
-            Dictionary of user IDs to their profileinfo.
+            Set of user IDs in the room.
         """
 
         assert latest_event_ids is not None
 
-        logger.debug("calling resolve_state_groups from get_current_users_in_room")
+        logger.debug("calling resolve_state_groups from get_current_user_ids_in_room")
         entry = await self.resolve_state_groups_for_events(room_id, latest_event_ids)
         state = await entry.get_state(self._state_storage_controller, StateFilter.all())
-        return await self.store.get_joined_users_from_state(room_id, state, entry)
+        return await self.store.get_joined_user_ids_from_state(room_id, state, entry)
 
     async def get_hosts_in_room_at_events(
         self, room_id: str, event_ids: Collection[str]
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 827c1f1efd..0eb024a809 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -835,9 +835,9 @@ class RoomMemberWorkerStore(EventsWorkerStore):
 
         return shared_room_ids or frozenset()
 
-    async def get_joined_users_from_state(
+    async def get_joined_user_ids_from_state(
         self, room_id: str, state: StateMap[str], state_entry: "_StateCacheEntry"
-    ) -> Dict[str, ProfileInfo]:
+    ) -> Set[str]:
         state_group: Union[object, int] = state_entry.state_group
         if not state_group:
             # If state_group is None it means it has yet to be assigned a
@@ -848,25 +848,25 @@ class RoomMemberWorkerStore(EventsWorkerStore):
 
         assert state_group is not None
         with Measure(self._clock, "get_joined_users_from_state"):
-            return await self._get_joined_users_from_context(
+            return await self._get_joined_user_ids_from_context(
                 room_id, state_group, state, context=state_entry
             )
 
     @cached(num_args=2, iterable=True, max_entries=100000)
-    async def _get_joined_users_from_context(
+    async def _get_joined_user_ids_from_context(
         self,
         room_id: str,
         state_group: Union[object, int],
         current_state_ids: StateMap[str],
         event: Optional[EventBase] = None,
         context: Optional["_StateCacheEntry"] = None,
-    ) -> Dict[str, ProfileInfo]:
+    ) -> Set[str]:
         # We don't use `state_group`, it's there so that we can cache based
         # on it. However, it's important that it's never None, since two current_states
         # with a state_group of None are likely to be different.
         assert state_group is not None
 
-        users_in_room = {}
+        users_in_room = set()
         member_event_ids = [
             e_id
             for key, e_id in current_state_ids.items()
@@ -879,11 +879,11 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             # If we do then we can reuse that result and simply update it with
             # any membership changes in `delta_ids`
             if context.prev_group and context.delta_ids:
-                prev_res = self._get_joined_users_from_context.cache.get_immediate(
+                prev_res = self._get_joined_user_ids_from_context.cache.get_immediate(
                     (room_id, context.prev_group), None
                 )
-                if prev_res and isinstance(prev_res, dict):
-                    users_in_room = dict(prev_res)
+                if prev_res and isinstance(prev_res, set):
+                    users_in_room = prev_res
                     member_event_ids = [
                         e_id
                         for key, e_id in context.delta_ids.items()
@@ -891,7 +891,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
                     ]
                     for etype, state_key in context.delta_ids:
                         if etype == EventTypes.Member:
-                            users_in_room.pop(state_key, None)
+                            users_in_room.discard(state_key)
 
         # We check if we have any of the member event ids in the event cache
         # before we ask the DB
@@ -908,42 +908,41 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             ev_entry = event_map.get(event_id)
             if ev_entry and not ev_entry.event.rejected_reason:
                 if ev_entry.event.membership == Membership.JOIN:
-                    users_in_room[ev_entry.event.state_key] = ProfileInfo(
-                        display_name=ev_entry.event.content.get("displayname", None),
-                        avatar_url=ev_entry.event.content.get("avatar_url", None),
-                    )
+                    users_in_room.add(ev_entry.event.state_key)
             else:
                 missing_member_event_ids.append(event_id)
 
         if missing_member_event_ids:
-            event_to_memberships = await self._get_joined_profiles_from_event_ids(
+            event_to_memberships = await self._get_user_ids_from_membership_event_ids(
                 missing_member_event_ids
             )
-            users_in_room.update(row for row in event_to_memberships.values() if row)
+            users_in_room.update(event_to_memberships.values())
 
         if event is not None and event.type == EventTypes.Member:
             if event.membership == Membership.JOIN:
                 if event.event_id in member_event_ids:
-                    users_in_room[event.state_key] = ProfileInfo(
-                        display_name=event.content.get("displayname", None),
-                        avatar_url=event.content.get("avatar_url", None),
-                    )
+                    users_in_room.add(event.state_key)
 
         return users_in_room
 
-    @cached(max_entries=10000)
-    def _get_joined_profile_from_event_id(
+    @cached(
+        max_entries=10000,
+        # This name matches the old function that has been replaced - the cache name
+        # is kept here to maintain backwards compatibility.
+        name="_get_joined_profile_from_event_id",
+    )
+    def _get_user_id_from_membership_event_id(
         self, event_id: str
     ) -> Optional[Tuple[str, ProfileInfo]]:
         raise NotImplementedError()
 
     @cachedList(
-        cached_method_name="_get_joined_profile_from_event_id",
+        cached_method_name="_get_user_id_from_membership_event_id",
         list_name="event_ids",
     )
-    async def _get_joined_profiles_from_event_ids(
+    async def _get_user_ids_from_membership_event_ids(
         self, event_ids: Iterable[str]
-    ) -> Dict[str, Optional[Tuple[str, ProfileInfo]]]:
+    ) -> Dict[str, str]:
         """For given set of member event_ids check if they point to a join
         event and if so return the associated user and profile info.
 
@@ -958,21 +957,13 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             table="room_memberships",
             column="event_id",
             iterable=event_ids,
-            retcols=("user_id", "display_name", "avatar_url", "event_id"),
+            retcols=("user_id", "event_id"),
             keyvalues={"membership": Membership.JOIN},
             batch_size=1000,
-            desc="_get_joined_profiles_from_event_ids",
+            desc="_get_user_ids_from_membership_event_ids",
         )
 
-        return {
-            row["event_id"]: (
-                row["user_id"],
-                ProfileInfo(
-                    avatar_url=row["avatar_url"], display_name=row["display_name"]
-                ),
-            )
-            for row in rows
-        }
+        return {row["event_id"]: row["user_id"] for row in rows}
 
     @cached(max_entries=10000)
     async def is_host_joined(self, room_id: str, host: str) -> bool:
@@ -1131,12 +1122,12 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             else:
                 # The cache doesn't match the state group or prev state group,
                 # so we calculate the result from first principles.
-                joined_users = await self.get_joined_users_from_state(
+                joined_user_ids = await self.get_joined_user_ids_from_state(
                     room_id, state, state_entry
                 )
 
                 cache.hosts_to_joined_users = {}
-                for user_id in joined_users:
+                for user_id in joined_user_ids:
                     host = intern_string(get_domain_from_id(user_id))
                     cache.hosts_to_joined_users.setdefault(host, set()).add(user_id)
 
diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index 867f315b2a..9d4bc89edb 100644
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@@ -73,8 +73,10 @@ class _CacheDescriptorBase:
         num_args: Optional[int],
         uncached_args: Optional[Collection[str]] = None,
         cache_context: bool = False,
+        name: Optional[str] = None,
     ):
         self.orig = orig
+        self.name = name or orig.__name__
 
         arg_spec = inspect.getfullargspec(orig)
         all_args = arg_spec.args
@@ -211,7 +213,7 @@ class LruCacheDescriptor(_CacheDescriptorBase):
 
     def __get__(self, obj: Optional[Any], owner: Optional[Type]) -> Callable[..., Any]:
         cache: LruCache[CacheKey, Any] = LruCache(
-            cache_name=self.orig.__name__,
+            cache_name=self.name,
             max_size=self.max_entries,
         )
 
@@ -241,7 +243,7 @@ class LruCacheDescriptor(_CacheDescriptorBase):
 
         wrapped = cast(_CachedFunction, _wrapped)
         wrapped.cache = cache
-        obj.__dict__[self.orig.__name__] = wrapped
+        obj.__dict__[self.name] = wrapped
 
         return wrapped
 
@@ -301,12 +303,14 @@ class DeferredCacheDescriptor(_CacheDescriptorBase):
         cache_context: bool = False,
         iterable: bool = False,
         prune_unread_entries: bool = True,
+        name: Optional[str] = None,
     ):
         super().__init__(
             orig,
             num_args=num_args,
             uncached_args=uncached_args,
             cache_context=cache_context,
+            name=name,
         )
 
         if tree and self.num_args < 2:
@@ -321,7 +325,7 @@ class DeferredCacheDescriptor(_CacheDescriptorBase):
 
     def __get__(self, obj: Optional[Any], owner: Optional[Type]) -> Callable[..., Any]:
         cache: DeferredCache[CacheKey, Any] = DeferredCache(
-            name=self.orig.__name__,
+            name=self.name,
             max_entries=self.max_entries,
             tree=self.tree,
             iterable=self.iterable,
@@ -372,7 +376,7 @@ class DeferredCacheDescriptor(_CacheDescriptorBase):
         wrapped.cache = cache
         wrapped.num_args = self.num_args
 
-        obj.__dict__[self.orig.__name__] = wrapped
+        obj.__dict__[self.name] = wrapped
 
         return wrapped
 
@@ -393,6 +397,7 @@ class DeferredCacheListDescriptor(_CacheDescriptorBase):
         cached_method_name: str,
         list_name: str,
         num_args: Optional[int] = None,
+        name: Optional[str] = None,
     ):
         """
         Args:
@@ -403,7 +408,7 @@ class DeferredCacheListDescriptor(_CacheDescriptorBase):
                 but including list_name) to use as cache keys. Defaults to all
                 named args of the function.
         """
-        super().__init__(orig, num_args=num_args, uncached_args=None)
+        super().__init__(orig, num_args=num_args, uncached_args=None, name=name)
 
         self.list_name = list_name
 
@@ -525,7 +530,7 @@ class DeferredCacheListDescriptor(_CacheDescriptorBase):
             else:
                 return defer.succeed(results)
 
-        obj.__dict__[self.orig.__name__] = wrapped
+        obj.__dict__[self.name] = wrapped
 
         return wrapped
 
@@ -577,6 +582,7 @@ def cached(
     cache_context: bool = False,
     iterable: bool = False,
     prune_unread_entries: bool = True,
+    name: Optional[str] = None,
 ) -> Callable[[F], _CachedFunction[F]]:
     func = lambda orig: DeferredCacheDescriptor(
         orig,
@@ -587,13 +593,18 @@ def cached(
         cache_context=cache_context,
         iterable=iterable,
         prune_unread_entries=prune_unread_entries,
+        name=name,
     )
 
     return cast(Callable[[F], _CachedFunction[F]], func)
 
 
 def cachedList(
-    *, cached_method_name: str, list_name: str, num_args: Optional[int] = None
+    *,
+    cached_method_name: str,
+    list_name: str,
+    num_args: Optional[int] = None,
+    name: Optional[str] = None,
 ) -> Callable[[F], _CachedFunction[F]]:
     """Creates a descriptor that wraps a function in a `DeferredCacheListDescriptor`.
 
@@ -628,6 +639,7 @@ def cachedList(
         cached_method_name=cached_method_name,
         list_name=list_name,
         num_args=num_args,
+        name=name,
     )
 
     return cast(Callable[[F], _CachedFunction[F]], func)
-- 
cgit 1.5.1


From 956e015413d3da417c1058e3e72d97b3d1bc8170 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 23 Aug 2022 12:40:00 +0100
Subject: Drop support for delegating email validation, round 2 (#13596)

---
 CHANGES.md                                       |  12 +++
 changelog.d/13596.removal                        |   1 +
 docs/upgrade.md                                  |  19 ++++
 docs/usage/configuration/config_documentation.md |   5 +-
 synapse/app/homeserver.py                        |   3 +-
 synapse/config/emailconfig.py                    |  46 ++--------
 synapse/config/registration.py                   |  13 +--
 synapse/handlers/identity.py                     |  56 +-----------
 synapse/handlers/ui_auth/checkers.py             |  21 +----
 synapse/rest/client/account.py                   | 108 ++++++++---------------
 synapse/rest/client/register.py                  |  59 +++++--------
 synapse/rest/synapse/client/password_reset.py    |   8 +-
 tests/rest/client/test_register.py               |   2 +-
 13 files changed, 108 insertions(+), 245 deletions(-)
 create mode 100644 changelog.d/13596.removal

(limited to 'synapse')

diff --git a/CHANGES.md b/CHANGES.md
index 778713f528..14fafc260d 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,6 +1,12 @@
 Synapse 1.66.0rc1 (2022-08-23)
 ==============================
 
+This release removes the ability for homeservers to delegate email ownership
+verification and password reset confirmation to identity servers. This removal
+was originally planned for Synapse 1.64, but was later deferred until now.
+
+See the [upgrade notes](https://matrix-org.github.io/synapse/v1.66/upgrade.html#upgrading-to-v1660) for more details.
+
 Features
 --------
 
@@ -33,6 +39,12 @@ Improved Documentation
 - Fix the doc and some warnings that were referring to the nonexistent `custom_templates_directory` setting (instead of `custom_template_directory`). ([\#13538](https://github.com/matrix-org/synapse/issues/13538))
 
 
+Deprecations and Removals
+-------------------------
+
+- Remove the ability for homeservers to delegate email ownership verification
+  and password reset confirmation to identity servers. See [upgrade notes](https://matrix-org.github.io/synapse/v1.66/upgrade.html#upgrading-to-v1660) for more details.
+
 Internal Changes
 ----------------
 
diff --git a/changelog.d/13596.removal b/changelog.d/13596.removal
new file mode 100644
index 0000000000..6c12ae75b4
--- /dev/null
+++ b/changelog.d/13596.removal
@@ -0,0 +1 @@
+Remove the ability for homeservers to delegate email ownership verification and password reset confirmation to identity servers. See [upgrade notes](https://github.com/matrix-org/synapse/blob/release-v1.66/docs/upgrade.md#upgrading-to-v1660) for more details.
\ No newline at end of file
diff --git a/docs/upgrade.md b/docs/upgrade.md
index 47a74b67de..0ab5bfeaf0 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -89,6 +89,25 @@ process, for example:
     dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
     ```
 
+# Upgrading to v1.66.0
+
+## Delegation of email validation no longer supported
+
+As of this version, Synapse no longer allows the tasks of verifying email address
+ownership, and password reset confirmation, to be delegated to an identity server.
+This removal was previously planned for Synapse 1.64.0, but was
+[delayed](https://github.com/matrix-org/synapse/issues/13421) until now to give
+homeserver administrators more notice of the change.
+
+To continue to allow users to add email addresses to their homeserver accounts,
+and perform password resets, make sure that Synapse is configured with a working
+email server in the [`email` configuration
+section](https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#email)
+(including, at a minimum, a `notif_from` setting.)
+
+Specifying an `email` setting under `account_threepid_delegates` will now cause
+an error at startup.
+
 # Upgrading to v1.64.0
 
 ## Deprecation of the ability to delegate e-mail verification to identity servers
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index cc72966823..8ae018e628 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -2182,7 +2182,10 @@ their account.
 by the Matrix Identity Service API
 [specification](https://matrix.org/docs/spec/identity_service/latest).)
 
-*Updated in Synapse 1.64.0*: The `email` option is deprecated.
+*Deprecated in Synapse 1.64.0*: The `email` option is deprecated.
+
+*Removed in Synapse 1.66.0*: The `email` option has been removed.
+If present, Synapse will report a configuration error on startup.
 
 Example configuration:
 ```yaml
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index d98012adeb..68993d91a9 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -44,7 +44,6 @@ from synapse.app._base import (
     register_start,
 )
 from synapse.config._base import ConfigError, format_config_error
-from synapse.config.emailconfig import ThreepidBehaviour
 from synapse.config.homeserver import HomeServerConfig
 from synapse.config.server import ListenerConfig
 from synapse.federation.transport.server import TransportLayerServer
@@ -202,7 +201,7 @@ class SynapseHomeServer(HomeServer):
                 }
             )
 
-            if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+            if self.config.email.can_verify_email:
                 from synapse.rest.synapse.client.password_reset import (
                     PasswordResetSubmitTokenResource,
                 )
diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py
index 66a6dbf1fe..a3af35b7c4 100644
--- a/synapse/config/emailconfig.py
+++ b/synapse/config/emailconfig.py
@@ -18,7 +18,6 @@
 import email.utils
 import logging
 import os
-from enum import Enum
 from typing import Any
 
 import attr
@@ -136,40 +135,22 @@ class EmailConfig(Config):
 
         self.email_enable_notifs = email_config.get("enable_notifs", False)
 
-        self.threepid_behaviour_email = (
-            # Have Synapse handle the email sending if account_threepid_delegates.email
-            # is not defined
-            # msisdn is currently always remote while Synapse does not support any method of
-            # sending SMS messages
-            ThreepidBehaviour.REMOTE
-            if self.root.registration.account_threepid_delegate_email
-            else ThreepidBehaviour.LOCAL
-        )
-
         if config.get("trust_identity_server_for_password_resets"):
             raise ConfigError(
-                'The config option "trust_identity_server_for_password_resets" has been removed.'
-                "Please consult the configuration manual at docs/usage/configuration/config_documentation.md for "
-                "details and update your config file."
+                'The config option "trust_identity_server_for_password_resets" '
+                "is no longer supported. Please remove it from the config file."
             )
 
-        self.local_threepid_handling_disabled_due_to_email_config = False
-        if (
-            self.threepid_behaviour_email == ThreepidBehaviour.LOCAL
-            and email_config == {}
-        ):
-            # We cannot warn the user this has happened here
-            # Instead do so when a user attempts to reset their password
-            self.local_threepid_handling_disabled_due_to_email_config = True
-
-            self.threepid_behaviour_email = ThreepidBehaviour.OFF
+        # If we have email config settings, assume that we can verify ownership of
+        # email addresses.
+        self.can_verify_email = email_config != {}
 
         # Get lifetime of a validation token in milliseconds
         self.email_validation_token_lifetime = self.parse_duration(
             email_config.get("validation_token_lifetime", "1h")
         )
 
-        if self.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+        if self.can_verify_email:
             missing = []
             if not self.email_notif_from:
                 missing.append("email.notif_from")
@@ -360,18 +341,3 @@ class EmailConfig(Config):
                     "Config option email.invite_client_location must be a http or https URL",
                     path=("email", "invite_client_location"),
                 )
-
-
-class ThreepidBehaviour(Enum):
-    """
-    Enum to define the behaviour of Synapse with regards to when it contacts an identity
-    server for 3pid registration and password resets
-
-    REMOTE = use an external server to send tokens
-    LOCAL = send tokens ourselves
-    OFF = disable registration via 3pid and password resets
-    """
-
-    REMOTE = "remote"
-    LOCAL = "local"
-    OFF = "off"
diff --git a/synapse/config/registration.py b/synapse/config/registration.py
index 01fb0331bc..a888d976f2 100644
--- a/synapse/config/registration.py
+++ b/synapse/config/registration.py
@@ -13,7 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import argparse
-import logging
 from typing import Any, Optional
 
 from synapse.api.constants import RoomCreationPreset
@@ -21,15 +20,11 @@ from synapse.config._base import Config, ConfigError
 from synapse.types import JsonDict, RoomAlias, UserID
 from synapse.util.stringutils import random_string_with_symbols, strtobool
 
-logger = logging.getLogger(__name__)
-
-LEGACY_EMAIL_DELEGATE_WARNING = """\
-Delegation of email verification to an identity server is now deprecated. To
+NO_EMAIL_DELEGATE_ERROR = """\
+Delegation of email verification to an identity server is no longer supported. To
 continue to allow users to add email addresses to their accounts, and use them for
 password resets, configure Synapse with an SMTP server via the `email` setting, and
 remove `account_threepid_delegates.email`.
-
-This will be an error in a future version.
 """
 
 
@@ -64,9 +59,7 @@ class RegistrationConfig(Config):
 
         account_threepid_delegates = config.get("account_threepid_delegates") or {}
         if "email" in account_threepid_delegates:
-            logger.warning(LEGACY_EMAIL_DELEGATE_WARNING)
-
-        self.account_threepid_delegate_email = account_threepid_delegates.get("email")
+            raise ConfigError(NO_EMAIL_DELEGATE_ERROR)
         self.account_threepid_delegate_msisdn = account_threepid_delegates.get("msisdn")
         self.default_identity_server = config.get("default_identity_server")
         self.allow_guest_access = config.get("allow_guest_access", False)
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index e5afe84df9..9571d461c8 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -26,7 +26,6 @@ from synapse.api.errors import (
     SynapseError,
 )
 from synapse.api.ratelimiting import Ratelimiter
-from synapse.config.emailconfig import ThreepidBehaviour
 from synapse.http import RequestTimedOutError
 from synapse.http.client import SimpleHttpClient
 from synapse.http.site import SynapseRequest
@@ -416,48 +415,6 @@ class IdentityHandler:
 
         return session_id
 
-    async def request_email_token(
-        self,
-        id_server: str,
-        email: str,
-        client_secret: str,
-        send_attempt: int,
-        next_link: Optional[str] = None,
-    ) -> JsonDict:
-        """
-        Request an external server send an email on our behalf for the purposes of threepid
-        validation.
-
-        Args:
-            id_server: The identity server to proxy to
-            email: The email to send the message to
-            client_secret: The unique client_secret sends by the user
-            send_attempt: Which attempt this is
-            next_link: A link to redirect the user to once they submit the token
-
-        Returns:
-            The json response body from the server
-        """
-        params = {
-            "email": email,
-            "client_secret": client_secret,
-            "send_attempt": send_attempt,
-        }
-        if next_link:
-            params["next_link"] = next_link
-
-        try:
-            data = await self.http_client.post_json_get_json(
-                id_server + "/_matrix/identity/api/v1/validate/email/requestToken",
-                params,
-            )
-            return data
-        except HttpResponseException as e:
-            logger.info("Proxied requestToken failed: %r", e)
-            raise e.to_synapse_error()
-        except RequestTimedOutError:
-            raise SynapseError(500, "Timed out contacting identity server")
-
     async def requestMsisdnToken(
         self,
         id_server: str,
@@ -531,18 +488,7 @@ class IdentityHandler:
         validation_session = None
 
         # Try to validate as email
-        if self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE:
-            # Remote emails will only be used if a valid identity server is provided.
-            assert (
-                self.hs.config.registration.account_threepid_delegate_email is not None
-            )
-
-            # Ask our delegated email identity server
-            validation_session = await self.threepid_from_creds(
-                self.hs.config.registration.account_threepid_delegate_email,
-                threepid_creds,
-            )
-        elif self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+        if self.hs.config.email.can_verify_email:
             # Get a validated session matching these details
             validation_session = await self.store.get_threepid_validation_session(
                 "email", client_secret, sid=sid, validated=True
diff --git a/synapse/handlers/ui_auth/checkers.py b/synapse/handlers/ui_auth/checkers.py
index 05cebb5d4d..a744d68c64 100644
--- a/synapse/handlers/ui_auth/checkers.py
+++ b/synapse/handlers/ui_auth/checkers.py
@@ -19,7 +19,6 @@ from twisted.web.client import PartialDownloadError
 
 from synapse.api.constants import LoginType
 from synapse.api.errors import Codes, LoginError, SynapseError
-from synapse.config.emailconfig import ThreepidBehaviour
 from synapse.util import json_decoder
 
 if TYPE_CHECKING:
@@ -153,7 +152,7 @@ class _BaseThreepidAuthChecker:
 
         logger.info("Getting validated threepid. threepidcreds: %r", (threepid_creds,))
 
-        # msisdns are currently always ThreepidBehaviour.REMOTE
+        # msisdns are currently always verified via the IS
         if medium == "msisdn":
             if not self.hs.config.registration.account_threepid_delegate_msisdn:
                 raise SynapseError(
@@ -164,18 +163,7 @@ class _BaseThreepidAuthChecker:
                 threepid_creds,
             )
         elif medium == "email":
-            if (
-                self.hs.config.email.threepid_behaviour_email
-                == ThreepidBehaviour.REMOTE
-            ):
-                assert self.hs.config.registration.account_threepid_delegate_email
-                threepid = await identity_handler.threepid_from_creds(
-                    self.hs.config.registration.account_threepid_delegate_email,
-                    threepid_creds,
-                )
-            elif (
-                self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL
-            ):
+            if self.hs.config.email.can_verify_email:
                 threepid = None
                 row = await self.store.get_threepid_validation_session(
                     medium,
@@ -227,10 +215,7 @@ class EmailIdentityAuthChecker(UserInteractiveAuthChecker, _BaseThreepidAuthChec
         _BaseThreepidAuthChecker.__init__(self, hs)
 
     def is_enabled(self) -> bool:
-        return self.hs.config.email.threepid_behaviour_email in (
-            ThreepidBehaviour.REMOTE,
-            ThreepidBehaviour.LOCAL,
-        )
+        return self.hs.config.email.can_verify_email
 
     async def check_auth(self, authdict: dict, clientip: str) -> Any:
         return await self._check_threepid("email", authdict)
diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py
index 9041e29d6c..1f9a8ccc23 100644
--- a/synapse/rest/client/account.py
+++ b/synapse/rest/client/account.py
@@ -29,7 +29,6 @@ from synapse.api.errors import (
     SynapseError,
     ThreepidValidationError,
 )
-from synapse.config.emailconfig import ThreepidBehaviour
 from synapse.handlers.ui_auth import UIAuthSessionDataConstants
 from synapse.http.server import HttpServer, finish_request, respond_with_html
 from synapse.http.servlet import (
@@ -68,7 +67,7 @@ class EmailPasswordRequestTokenRestServlet(RestServlet):
         self.config = hs.config
         self.identity_handler = hs.get_identity_handler()
 
-        if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+        if self.config.email.can_verify_email:
             self.mailer = Mailer(
                 hs=self.hs,
                 app_name=self.config.email.email_app_name,
@@ -77,11 +76,10 @@ class EmailPasswordRequestTokenRestServlet(RestServlet):
             )
 
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
-        if self.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF:
-            if self.config.email.local_threepid_handling_disabled_due_to_email_config:
-                logger.warning(
-                    "User password resets have been disabled due to lack of email config"
-                )
+        if not self.config.email.can_verify_email:
+            logger.warning(
+                "User password resets have been disabled due to lack of email config"
+            )
             raise SynapseError(
                 400, "Email-based password resets have been disabled on this server"
             )
@@ -117,35 +115,20 @@ class EmailPasswordRequestTokenRestServlet(RestServlet):
 
             raise SynapseError(400, "Email not found", Codes.THREEPID_NOT_FOUND)
 
-        if self.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE:
-            assert self.hs.config.registration.account_threepid_delegate_email
-
-            # Have the configured identity server handle the request
-            ret = await self.identity_handler.request_email_token(
-                self.hs.config.registration.account_threepid_delegate_email,
-                body.email,
-                body.client_secret,
-                body.send_attempt,
-                body.next_link,
-            )
-        else:
-            # Send password reset emails from Synapse
-            sid = await self.identity_handler.send_threepid_validation(
-                body.email,
-                body.client_secret,
-                body.send_attempt,
-                self.mailer.send_password_reset_mail,
-                body.next_link,
-            )
-
-            # Wrap the session id in a JSON object
-            ret = {"sid": sid}
-
+        # Send password reset emails from Synapse
+        sid = await self.identity_handler.send_threepid_validation(
+            body.email,
+            body.client_secret,
+            body.send_attempt,
+            self.mailer.send_password_reset_mail,
+            body.next_link,
+        )
         threepid_send_requests.labels(type="email", reason="password_reset").observe(
             body.send_attempt
         )
 
-        return 200, ret
+        # Wrap the session id in a JSON object
+        return 200, {"sid": sid}
 
 
 class PasswordRestServlet(RestServlet):
@@ -340,7 +323,7 @@ class EmailThreepidRequestTokenRestServlet(RestServlet):
         self.identity_handler = hs.get_identity_handler()
         self.store = self.hs.get_datastores().main
 
-        if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+        if self.config.email.can_verify_email:
             self.mailer = Mailer(
                 hs=self.hs,
                 app_name=self.config.email.email_app_name,
@@ -349,11 +332,10 @@ class EmailThreepidRequestTokenRestServlet(RestServlet):
             )
 
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
-        if self.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF:
-            if self.config.email.local_threepid_handling_disabled_due_to_email_config:
-                logger.warning(
-                    "Adding emails have been disabled due to lack of an email config"
-                )
+        if not self.config.email.can_verify_email:
+            logger.warning(
+                "Adding emails have been disabled due to lack of an email config"
+            )
             raise SynapseError(
                 400,
                 "Adding an email to your account is disabled on this server",
@@ -391,35 +373,21 @@ class EmailThreepidRequestTokenRestServlet(RestServlet):
 
             raise SynapseError(400, "Email is already in use", Codes.THREEPID_IN_USE)
 
-        if self.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE:
-            assert self.hs.config.registration.account_threepid_delegate_email
-
-            # Have the configured identity server handle the request
-            ret = await self.identity_handler.request_email_token(
-                self.hs.config.registration.account_threepid_delegate_email,
-                body.email,
-                body.client_secret,
-                body.send_attempt,
-                body.next_link,
-            )
-        else:
-            # Send threepid validation emails from Synapse
-            sid = await self.identity_handler.send_threepid_validation(
-                body.email,
-                body.client_secret,
-                body.send_attempt,
-                self.mailer.send_add_threepid_mail,
-                body.next_link,
-            )
-
-            # Wrap the session id in a JSON object
-            ret = {"sid": sid}
+        # Send threepid validation emails from Synapse
+        sid = await self.identity_handler.send_threepid_validation(
+            body.email,
+            body.client_secret,
+            body.send_attempt,
+            self.mailer.send_add_threepid_mail,
+            body.next_link,
+        )
 
         threepid_send_requests.labels(type="email", reason="add_threepid").observe(
             body.send_attempt
         )
 
-        return 200, ret
+        # Wrap the session id in a JSON object
+        return 200, {"sid": sid}
 
 
 class MsisdnThreepidRequestTokenRestServlet(RestServlet):
@@ -512,24 +480,18 @@ class AddThreepidEmailSubmitTokenServlet(RestServlet):
         self.config = hs.config
         self.clock = hs.get_clock()
         self.store = hs.get_datastores().main
-        if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+        if self.config.email.can_verify_email:
             self._failure_email_template = (
                 self.config.email.email_add_threepid_template_failure_html
             )
 
     async def on_GET(self, request: Request) -> None:
-        if self.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF:
-            if self.config.email.local_threepid_handling_disabled_due_to_email_config:
-                logger.warning(
-                    "Adding emails have been disabled due to lack of an email config"
-                )
-            raise SynapseError(
-                400, "Adding an email to your account is disabled on this server"
+        if not self.config.email.can_verify_email:
+            logger.warning(
+                "Adding emails have been disabled due to lack of an email config"
             )
-        elif self.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE:
             raise SynapseError(
-                400,
-                "This homeserver is not validating threepids.",
+                400, "Adding an email to your account is disabled on this server"
             )
 
         sid = parse_string(request, "sid", required=True)
diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py
index 1b953d3fa0..20bab20c8f 100644
--- a/synapse/rest/client/register.py
+++ b/synapse/rest/client/register.py
@@ -31,7 +31,6 @@ from synapse.api.errors import (
 )
 from synapse.api.ratelimiting import Ratelimiter
 from synapse.config import ConfigError
-from synapse.config.emailconfig import ThreepidBehaviour
 from synapse.config.homeserver import HomeServerConfig
 from synapse.config.ratelimiting import FederationRatelimitSettings
 from synapse.config.server import is_threepid_reserved
@@ -74,7 +73,7 @@ class EmailRegisterRequestTokenRestServlet(RestServlet):
         self.identity_handler = hs.get_identity_handler()
         self.config = hs.config
 
-        if self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+        if self.hs.config.email.can_verify_email:
             self.mailer = Mailer(
                 hs=self.hs,
                 app_name=self.config.email.email_app_name,
@@ -83,13 +82,10 @@ class EmailRegisterRequestTokenRestServlet(RestServlet):
             )
 
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
-        if self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF:
-            if (
-                self.hs.config.email.local_threepid_handling_disabled_due_to_email_config
-            ):
-                logger.warning(
-                    "Email registration has been disabled due to lack of email config"
-                )
+        if not self.hs.config.email.can_verify_email:
+            logger.warning(
+                "Email registration has been disabled due to lack of email config"
+            )
             raise SynapseError(
                 400, "Email-based registration has been disabled on this server"
             )
@@ -138,35 +134,21 @@ class EmailRegisterRequestTokenRestServlet(RestServlet):
 
             raise SynapseError(400, "Email is already in use", Codes.THREEPID_IN_USE)
 
-        if self.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE:
-            assert self.hs.config.registration.account_threepid_delegate_email
-
-            # Have the configured identity server handle the request
-            ret = await self.identity_handler.request_email_token(
-                self.hs.config.registration.account_threepid_delegate_email,
-                email,
-                client_secret,
-                send_attempt,
-                next_link,
-            )
-        else:
-            # Send registration emails from Synapse,
-            # wrapping the session id in a JSON object.
-            ret = {
-                "sid": await self.identity_handler.send_threepid_validation(
-                    email,
-                    client_secret,
-                    send_attempt,
-                    self.mailer.send_registration_mail,
-                    next_link,
-                )
-            }
+        # Send registration emails from Synapse
+        sid = await self.identity_handler.send_threepid_validation(
+            email,
+            client_secret,
+            send_attempt,
+            self.mailer.send_registration_mail,
+            next_link,
+        )
 
         threepid_send_requests.labels(type="email", reason="register").observe(
             send_attempt
         )
 
-        return 200, ret
+        # Wrap the session id in a JSON object
+        return 200, {"sid": sid}
 
 
 class MsisdnRegisterRequestTokenRestServlet(RestServlet):
@@ -260,7 +242,7 @@ class RegistrationSubmitTokenServlet(RestServlet):
         self.clock = hs.get_clock()
         self.store = hs.get_datastores().main
 
-        if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+        if self.config.email.can_verify_email:
             self._failure_email_template = (
                 self.config.email.email_registration_template_failure_html
             )
@@ -270,11 +252,10 @@ class RegistrationSubmitTokenServlet(RestServlet):
             raise SynapseError(
                 400, "This medium is currently not supported for registration"
             )
-        if self.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF:
-            if self.config.email.local_threepid_handling_disabled_due_to_email_config:
-                logger.warning(
-                    "User registration via email has been disabled due to lack of email config"
-                )
+        if not self.config.email.can_verify_email:
+            logger.warning(
+                "User registration via email has been disabled due to lack of email config"
+            )
             raise SynapseError(
                 400, "Email-based registration is disabled on this server"
             )
diff --git a/synapse/rest/synapse/client/password_reset.py b/synapse/rest/synapse/client/password_reset.py
index 6ac9dbc7c9..b9402cfb75 100644
--- a/synapse/rest/synapse/client/password_reset.py
+++ b/synapse/rest/synapse/client/password_reset.py
@@ -17,7 +17,6 @@ from typing import TYPE_CHECKING, Tuple
 from twisted.web.server import Request
 
 from synapse.api.errors import ThreepidValidationError
-from synapse.config.emailconfig import ThreepidBehaviour
 from synapse.http.server import DirectServeHtmlResource
 from synapse.http.servlet import parse_string
 from synapse.util.stringutils import assert_valid_client_secret
@@ -46,9 +45,6 @@ class PasswordResetSubmitTokenResource(DirectServeHtmlResource):
         self.clock = hs.get_clock()
         self.store = hs.get_datastores().main
 
-        self._local_threepid_handling_disabled_due_to_email_config = (
-            hs.config.email.local_threepid_handling_disabled_due_to_email_config
-        )
         self._confirmation_email_template = (
             hs.config.email.email_password_reset_template_confirmation_html
         )
@@ -59,8 +55,8 @@ class PasswordResetSubmitTokenResource(DirectServeHtmlResource):
             hs.config.email.email_password_reset_template_failure_html
         )
 
-        # This resource should not be mounted if threepid behaviour is not LOCAL
-        assert hs.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL
+        # This resource should only be mounted if email validation is enabled
+        assert hs.config.email.can_verify_email
 
     async def _async_render_GET(self, request: Request) -> Tuple[int, bytes]:
         sid = parse_string(request, "sid", required=True)
diff --git a/tests/rest/client/test_register.py b/tests/rest/client/test_register.py
index ab4277dd31..b781875d52 100644
--- a/tests/rest/client/test_register.py
+++ b/tests/rest/client/test_register.py
@@ -586,9 +586,9 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
                 "require_at_registration": True,
             },
             "account_threepid_delegates": {
-                "email": "https://id_server",
                 "msisdn": "https://id_server",
             },
+            "email": {"notif_from": "Synapse <synapse@example.com>"},
         }
     )
     def test_advertised_flows_captcha_and_terms_and_3pids(self) -> None:
-- 
cgit 1.5.1


From aec87a0f9369a3015b2a53469f88d1de274e8b71 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 23 Aug 2022 13:15:43 +0100
Subject: Speed up fetching large numbers of push rules (#13592)

---
 changelog.d/13592.misc                         | 1 +
 synapse/replication/slave/storage/push_rule.py | 1 -
 synapse/storage/databases/main/account_data.py | 3 ---
 synapse/storage/databases/main/push_rule.py    | 6 +-----
 4 files changed, 2 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/13592.misc

(limited to 'synapse')

diff --git a/changelog.d/13592.misc b/changelog.d/13592.misc
new file mode 100644
index 0000000000..8f48d557e5
--- /dev/null
+++ b/changelog.d/13592.misc
@@ -0,0 +1 @@
+Minor speed up of fetching large numbers of push rules.
diff --git a/synapse/replication/slave/storage/push_rule.py b/synapse/replication/slave/storage/push_rule.py
index 52ee3f7e58..5e65eaf1e0 100644
--- a/synapse/replication/slave/storage/push_rule.py
+++ b/synapse/replication/slave/storage/push_rule.py
@@ -31,6 +31,5 @@ class SlavedPushRuleStore(SlavedEventStore, PushRulesWorkerStore):
             self._push_rules_stream_id_gen.advance(instance_name, token)
             for row in rows:
                 self.get_push_rules_for_user.invalidate((row.user_id,))
-                self.get_push_rules_enabled_for_user.invalidate((row.user_id,))
                 self.push_rules_stream_cache.entity_has_changed(row.user_id, token)
         return super().process_replication_rows(stream_name, instance_name, token, rows)
diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
index 9af9f4f18e..c38b8a9e5a 100644
--- a/synapse/storage/databases/main/account_data.py
+++ b/synapse/storage/databases/main/account_data.py
@@ -650,9 +650,6 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
             txn, self.get_account_data_for_room, (user_id,)
         )
         self._invalidate_cache_and_stream(txn, self.get_push_rules_for_user, (user_id,))
-        self._invalidate_cache_and_stream(
-            txn, self.get_push_rules_enabled_for_user, (user_id,)
-        )
         # This user might be contained in the ignored_by cache for other users,
         # so we have to invalidate it all.
         self._invalidate_all_cache_and_stream(txn, self.ignored_by)
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index 255620f996..5079edd1e0 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -165,7 +165,6 @@ class PushRulesWorkerStore(
 
         return _load_rules(rows, enabled_map, self.hs.config.experimental)
 
-    @cached(max_entries=5000)
     async def get_push_rules_enabled_for_user(self, user_id: str) -> Dict[str, bool]:
         results = await self.db_pool.simple_select_list(
             table="push_rules_enable",
@@ -229,9 +228,6 @@ class PushRulesWorkerStore(
 
         return results
 
-    @cachedList(
-        cached_method_name="get_push_rules_enabled_for_user", list_name="user_ids"
-    )
     async def bulk_get_push_rules_enabled(
         self, user_ids: Collection[str]
     ) -> Dict[str, Dict[str, bool]]:
@@ -246,6 +242,7 @@ class PushRulesWorkerStore(
             iterable=user_ids,
             retcols=("user_name", "rule_id", "enabled"),
             desc="bulk_get_push_rules_enabled",
+            batch_size=1000,
         )
         for row in rows:
             enabled = bool(row["enabled"])
@@ -792,7 +789,6 @@ class PushRuleStore(PushRulesWorkerStore):
         self.db_pool.simple_insert_txn(txn, "push_rules_stream", values=values)
 
         txn.call_after(self.get_push_rules_for_user.invalidate, (user_id,))
-        txn.call_after(self.get_push_rules_enabled_for_user.invalidate, (user_id,))
         txn.call_after(
             self.push_rules_stream_cache.entity_has_changed, user_id, stream_id
         )
-- 
cgit 1.5.1


From 05c9c7363b09d2517a79915b831ce423c7defc7e Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 23 Aug 2022 15:14:05 +0100
Subject: Fix regression caused by #13573 (#13600)

Broke in #13573.
---
 changelog.d/13600.misc                       |  1 +
 synapse/storage/databases/main/roommember.py | 10 ++++++----
 2 files changed, 7 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/13600.misc

(limited to 'synapse')

diff --git a/changelog.d/13600.misc b/changelog.d/13600.misc
new file mode 100644
index 0000000000..1ce9c0c081
--- /dev/null
+++ b/changelog.d/13600.misc
@@ -0,0 +1 @@
+Cache user IDs instead of profiles to reduce cache memory usage. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 0eb024a809..046ad3a11c 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -916,7 +916,9 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             event_to_memberships = await self._get_user_ids_from_membership_event_ids(
                 missing_member_event_ids
             )
-            users_in_room.update(event_to_memberships.values())
+            users_in_room.update(
+                user_id for user_id in event_to_memberships.values() if user_id
+            )
 
         if event is not None and event.type == EventTypes.Member:
             if event.membership == Membership.JOIN:
@@ -942,15 +944,15 @@ class RoomMemberWorkerStore(EventsWorkerStore):
     )
     async def _get_user_ids_from_membership_event_ids(
         self, event_ids: Iterable[str]
-    ) -> Dict[str, str]:
+    ) -> Dict[str, Optional[str]]:
         """For given set of member event_ids check if they point to a join
-        event and if so return the associated user and profile info.
+        event.
 
         Args:
             event_ids: The member event IDs to lookup
 
         Returns:
-            Map from event ID to `user_id` and ProfileInfo (or None if not join event).
+            Map from event ID to `user_id`, or None if event is not a join.
         """
 
         rows = await self.db_pool.simple_select_many_batch(
-- 
cgit 1.5.1


From f7ddfe17a30a50205a23bf5ca4d7d71e691e1e48 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 23 Aug 2022 15:53:27 +0100
Subject: Speed up `@cachedList` (#13591)

This speeds things up by ~2x.

The vast majority of the time is now spent in `LruCache` moving things around the linked lists.

We do this via two things:
1. Don't create a deferred per-key during bulk set operations in `DeferredCache`. Instead, only create them if a subsequent caller asks for the key.
2. Add a bulk lookup API to `DeferredCache` rather than use a loop.
---
 changelog.d/13591.misc                |   1 +
 synapse/util/caches/deferred_cache.py | 346 +++++++++++++++++++++++++---------
 synapse/util/caches/descriptors.py    |  89 ++++-----
 synapse/util/caches/treecache.py      |   3 +
 4 files changed, 298 insertions(+), 141 deletions(-)
 create mode 100644 changelog.d/13591.misc

(limited to 'synapse')

diff --git a/changelog.d/13591.misc b/changelog.d/13591.misc
new file mode 100644
index 0000000000..080e865e55
--- /dev/null
+++ b/changelog.d/13591.misc
@@ -0,0 +1 @@
+Improve performance of `@cachedList`.
diff --git a/synapse/util/caches/deferred_cache.py b/synapse/util/caches/deferred_cache.py
index 1d6ec22191..6425f851ea 100644
--- a/synapse/util/caches/deferred_cache.py
+++ b/synapse/util/caches/deferred_cache.py
@@ -14,15 +14,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import abc
 import enum
 import threading
 from typing import (
     Callable,
+    Collection,
+    Dict,
     Generic,
-    Iterable,
     MutableMapping,
     Optional,
+    Set,
     Sized,
+    Tuple,
     TypeVar,
     Union,
     cast,
@@ -31,7 +35,6 @@ from typing import (
 from prometheus_client import Gauge
 
 from twisted.internet import defer
-from twisted.python import failure
 from twisted.python.failure import Failure
 
 from synapse.util.async_helpers import ObservableDeferred
@@ -94,7 +97,7 @@ class DeferredCache(Generic[KT, VT]):
 
         # _pending_deferred_cache maps from the key value to a `CacheEntry` object.
         self._pending_deferred_cache: Union[
-            TreeCache, "MutableMapping[KT, CacheEntry]"
+            TreeCache, "MutableMapping[KT, CacheEntry[KT, VT]]"
         ] = cache_type()
 
         def metrics_cb() -> None:
@@ -159,15 +162,16 @@ class DeferredCache(Generic[KT, VT]):
         Raises:
             KeyError if the key is not found in the cache
         """
-        callbacks = [callback] if callback else []
         val = self._pending_deferred_cache.get(key, _Sentinel.sentinel)
         if val is not _Sentinel.sentinel:
-            val.callbacks.update(callbacks)
+            val.add_invalidation_callback(key, callback)
             if update_metrics:
                 m = self.cache.metrics
                 assert m  # we always have a name, so should always have metrics
                 m.inc_hits()
-            return val.deferred.observe()
+            return val.deferred(key)
+
+        callbacks = (callback,) if callback else ()
 
         val2 = self.cache.get(
             key, _Sentinel.sentinel, callbacks=callbacks, update_metrics=update_metrics
@@ -177,6 +181,73 @@ class DeferredCache(Generic[KT, VT]):
         else:
             return defer.succeed(val2)
 
+    def get_bulk(
+        self,
+        keys: Collection[KT],
+        callback: Optional[Callable[[], None]] = None,
+    ) -> Tuple[Dict[KT, VT], Optional["defer.Deferred[Dict[KT, VT]]"], Collection[KT]]:
+        """Bulk lookup of items in the cache.
+
+        Returns:
+            A 3-tuple of:
+                1. a dict of key/value of items already cached;
+                2. a deferred that resolves to a dict of key/value of items
+                   we're already fetching; and
+                3. a collection of keys that don't appear in the previous two.
+        """
+
+        # The cached results
+        cached = {}
+
+        # List of pending deferreds
+        pending = []
+
+        # Dict that gets filled out when the pending deferreds complete
+        pending_results = {}
+
+        # List of keys that aren't in either cache
+        missing = []
+
+        callbacks = (callback,) if callback else ()
+
+        for key in keys:
+            # Check if its in the main cache.
+            immediate_value = self.cache.get(
+                key,
+                _Sentinel.sentinel,
+                callbacks=callbacks,
+            )
+            if immediate_value is not _Sentinel.sentinel:
+                cached[key] = immediate_value
+                continue
+
+            # Check if its in the pending cache
+            pending_value = self._pending_deferred_cache.get(key, _Sentinel.sentinel)
+            if pending_value is not _Sentinel.sentinel:
+                pending_value.add_invalidation_callback(key, callback)
+
+                def completed_cb(value: VT, key: KT) -> VT:
+                    pending_results[key] = value
+                    return value
+
+                # Add a callback to fill out `pending_results` when that completes
+                d = pending_value.deferred(key).addCallback(completed_cb, key)
+                pending.append(d)
+                continue
+
+            # Not in either cache
+            missing.append(key)
+
+        # If we've got pending deferreds, squash them into a single one that
+        # returns `pending_results`.
+        pending_deferred = None
+        if pending:
+            pending_deferred = defer.gatherResults(
+                pending, consumeErrors=True
+            ).addCallback(lambda _: pending_results)
+
+        return (cached, pending_deferred, missing)
+
     def get_immediate(
         self, key: KT, default: T, update_metrics: bool = True
     ) -> Union[VT, T]:
@@ -218,84 +289,89 @@ class DeferredCache(Generic[KT, VT]):
             value: a deferred which will complete with a result to add to the cache
             callback: An optional callback to be called when the entry is invalidated
         """
-        if not isinstance(value, defer.Deferred):
-            raise TypeError("not a Deferred")
-
-        callbacks = [callback] if callback else []
         self.check_thread()
 
-        existing_entry = self._pending_deferred_cache.pop(key, None)
-        if existing_entry:
-            existing_entry.invalidate()
+        self._pending_deferred_cache.pop(key, None)
 
         # XXX: why don't we invalidate the entry in `self.cache` yet?
 
-        # we can save a whole load of effort if the deferred is ready.
-        if value.called:
-            result = value.result
-            if not isinstance(result, failure.Failure):
-                self.cache.set(key, cast(VT, result), callbacks)
-            return value
-
         # otherwise, we'll add an entry to the _pending_deferred_cache for now,
         # and add callbacks to add it to the cache properly later.
+        entry = CacheEntrySingle[KT, VT](value)
+        entry.add_invalidation_callback(key, callback)
+        self._pending_deferred_cache[key] = entry
+        deferred = entry.deferred(key).addCallbacks(
+            self._completed_callback,
+            self._error_callback,
+            callbackArgs=(entry, key),
+            errbackArgs=(entry, key),
+        )
 
-        observable = ObservableDeferred(value, consumeErrors=True)
-        observer = observable.observe()
-        entry = CacheEntry(deferred=observable, callbacks=callbacks)
+        # we return a new Deferred which will be called before any subsequent observers.
+        return deferred
 
-        self._pending_deferred_cache[key] = entry
+    def start_bulk_input(
+        self,
+        keys: Collection[KT],
+        callback: Optional[Callable[[], None]] = None,
+    ) -> "CacheMultipleEntries[KT, VT]":
+        """Bulk set API for use when fetching multiple keys at once from the DB.
 
-        def compare_and_pop() -> bool:
-            """Check if our entry is still the one in _pending_deferred_cache, and
-            if so, pop it.
-
-            Returns true if the entries matched.
-            """
-            existing_entry = self._pending_deferred_cache.pop(key, None)
-            if existing_entry is entry:
-                return True
-
-            # oops, the _pending_deferred_cache has been updated since
-            # we started our query, so we are out of date.
-            #
-            # Better put back whatever we took out. (We do it this way
-            # round, rather than peeking into the _pending_deferred_cache
-            # and then removing on a match, to make the common case faster)
-            if existing_entry is not None:
-                self._pending_deferred_cache[key] = existing_entry
-
-            return False
-
-        def cb(result: VT) -> None:
-            if compare_and_pop():
-                self.cache.set(key, result, entry.callbacks)
-            else:
-                # we're not going to put this entry into the cache, so need
-                # to make sure that the invalidation callbacks are called.
-                # That was probably done when _pending_deferred_cache was
-                # updated, but it's possible that `set` was called without
-                # `invalidate` being previously called, in which case it may
-                # not have been. Either way, let's double-check now.
-                entry.invalidate()
-
-        def eb(_fail: Failure) -> None:
-            compare_and_pop()
-            entry.invalidate()
-
-        # once the deferred completes, we can move the entry from the
-        # _pending_deferred_cache to the real cache.
-        #
-        observer.addCallbacks(cb, eb)
+        Called *before* starting the fetch from the DB, and the caller *must*
+        call either `complete_bulk(..)` or `error_bulk(..)` on the return value.
+        """
 
-        # we return a new Deferred which will be called before any subsequent observers.
-        return observable.observe()
+        entry = CacheMultipleEntries[KT, VT]()
+        entry.add_global_invalidation_callback(callback)
+
+        for key in keys:
+            self._pending_deferred_cache[key] = entry
+
+        return entry
+
+    def _completed_callback(
+        self, value: VT, entry: "CacheEntry[KT, VT]", key: KT
+    ) -> VT:
+        """Called when a deferred is completed."""
+        # We check if the current entry matches the entry associated with the
+        # deferred. If they don't match then it got invalidated.
+        current_entry = self._pending_deferred_cache.pop(key, None)
+        if current_entry is not entry:
+            if current_entry:
+                self._pending_deferred_cache[key] = current_entry
+            return value
+
+        self.cache.set(key, value, entry.get_invalidation_callbacks(key))
+
+        return value
+
+    def _error_callback(
+        self,
+        failure: Failure,
+        entry: "CacheEntry[KT, VT]",
+        key: KT,
+    ) -> Failure:
+        """Called when a deferred errors."""
+
+        # We check if the current entry matches the entry associated with the
+        # deferred. If they don't match then it got invalidated.
+        current_entry = self._pending_deferred_cache.pop(key, None)
+        if current_entry is not entry:
+            if current_entry:
+                self._pending_deferred_cache[key] = current_entry
+            return failure
+
+        for cb in entry.get_invalidation_callbacks(key):
+            cb()
+
+        return failure
 
     def prefill(
         self, key: KT, value: VT, callback: Optional[Callable[[], None]] = None
     ) -> None:
-        callbacks = [callback] if callback else []
+        callbacks = (callback,) if callback else ()
         self.cache.set(key, value, callbacks=callbacks)
+        self._pending_deferred_cache.pop(key, None)
 
     def invalidate(self, key: KT) -> None:
         """Delete a key, or tree of entries
@@ -311,41 +387,129 @@ class DeferredCache(Generic[KT, VT]):
         self.cache.del_multi(key)
 
         # if we have a pending lookup for this key, remove it from the
-        # _pending_deferred_cache, which will (a) stop it being returned
-        # for future queries and (b) stop it being persisted as a proper entry
+        # _pending_deferred_cache, which will (a) stop it being returned for
+        # future queries and (b) stop it being persisted as a proper entry
         # in self.cache.
         entry = self._pending_deferred_cache.pop(key, None)
-
-        # run the invalidation callbacks now, rather than waiting for the
-        # deferred to resolve.
         if entry:
             # _pending_deferred_cache.pop should either return a CacheEntry, or, in the
             # case of a TreeCache, a dict of keys to cache entries. Either way calling
             # iterate_tree_cache_entry on it will do the right thing.
             for entry in iterate_tree_cache_entry(entry):
-                entry.invalidate()
+                for cb in entry.get_invalidation_callbacks(key):
+                    cb()
 
     def invalidate_all(self) -> None:
         self.check_thread()
         self.cache.clear()
-        for entry in self._pending_deferred_cache.values():
-            entry.invalidate()
+        for key, entry in self._pending_deferred_cache.items():
+            for cb in entry.get_invalidation_callbacks(key):
+                cb()
+
         self._pending_deferred_cache.clear()
 
 
-class CacheEntry:
-    __slots__ = ["deferred", "callbacks", "invalidated"]
+class CacheEntry(Generic[KT, VT], metaclass=abc.ABCMeta):
+    """Abstract class for entries in `DeferredCache[KT, VT]`"""
 
-    def __init__(
-        self, deferred: ObservableDeferred, callbacks: Iterable[Callable[[], None]]
-    ):
-        self.deferred = deferred
-        self.callbacks = set(callbacks)
-        self.invalidated = False
-
-    def invalidate(self) -> None:
-        if not self.invalidated:
-            self.invalidated = True
-            for callback in self.callbacks:
-                callback()
-            self.callbacks.clear()
+    @abc.abstractmethod
+    def deferred(self, key: KT) -> "defer.Deferred[VT]":
+        """Get a deferred that a caller can wait on to get the value at the
+        given key"""
+        ...
+
+    @abc.abstractmethod
+    def add_invalidation_callback(
+        self, key: KT, callback: Optional[Callable[[], None]]
+    ) -> None:
+        """Add an invalidation callback"""
+        ...
+
+    @abc.abstractmethod
+    def get_invalidation_callbacks(self, key: KT) -> Collection[Callable[[], None]]:
+        """Get all invalidation callbacks"""
+        ...
+
+
+class CacheEntrySingle(CacheEntry[KT, VT]):
+    """An implementation of `CacheEntry` wrapping a deferred that results in a
+    single cache entry.
+    """
+
+    __slots__ = ["_deferred", "_callbacks"]
+
+    def __init__(self, deferred: "defer.Deferred[VT]") -> None:
+        self._deferred = ObservableDeferred(deferred, consumeErrors=True)
+        self._callbacks: Set[Callable[[], None]] = set()
+
+    def deferred(self, key: KT) -> "defer.Deferred[VT]":
+        return self._deferred.observe()
+
+    def add_invalidation_callback(
+        self, key: KT, callback: Optional[Callable[[], None]]
+    ) -> None:
+        if callback is None:
+            return
+
+        self._callbacks.add(callback)
+
+    def get_invalidation_callbacks(self, key: KT) -> Collection[Callable[[], None]]:
+        return self._callbacks
+
+
+class CacheMultipleEntries(CacheEntry[KT, VT]):
+    """Cache entry that is used for bulk lookups and insertions."""
+
+    __slots__ = ["_deferred", "_callbacks", "_global_callbacks"]
+
+    def __init__(self) -> None:
+        self._deferred: Optional[ObservableDeferred[Dict[KT, VT]]] = None
+        self._callbacks: Dict[KT, Set[Callable[[], None]]] = {}
+        self._global_callbacks: Set[Callable[[], None]] = set()
+
+    def deferred(self, key: KT) -> "defer.Deferred[VT]":
+        if not self._deferred:
+            self._deferred = ObservableDeferred(defer.Deferred(), consumeErrors=True)
+        return self._deferred.observe().addCallback(lambda res: res.get(key))
+
+    def add_invalidation_callback(
+        self, key: KT, callback: Optional[Callable[[], None]]
+    ) -> None:
+        if callback is None:
+            return
+
+        self._callbacks.setdefault(key, set()).add(callback)
+
+    def get_invalidation_callbacks(self, key: KT) -> Collection[Callable[[], None]]:
+        return self._callbacks.get(key, set()) | self._global_callbacks
+
+    def add_global_invalidation_callback(
+        self, callback: Optional[Callable[[], None]]
+    ) -> None:
+        """Add a callback for when any keys get invalidated."""
+        if callback is None:
+            return
+
+        self._global_callbacks.add(callback)
+
+    def complete_bulk(
+        self,
+        cache: DeferredCache[KT, VT],
+        result: Dict[KT, VT],
+    ) -> None:
+        """Called when there is a result"""
+        for key, value in result.items():
+            cache._completed_callback(value, self, key)
+
+        if self._deferred:
+            self._deferred.callback(result)
+
+    def error_bulk(
+        self, cache: DeferredCache[KT, VT], keys: Collection[KT], failure: Failure
+    ) -> None:
+        """Called when bulk lookup failed."""
+        for key in keys:
+            cache._error_callback(failure, self, key)
+
+        if self._deferred:
+            self._deferred.errback(failure)
diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index 9d4bc89edb..10aff4d04a 100644
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@@ -25,6 +25,7 @@ from typing import (
     Generic,
     Hashable,
     Iterable,
+    List,
     Mapping,
     Optional,
     Sequence,
@@ -440,16 +441,6 @@ class DeferredCacheListDescriptor(_CacheDescriptorBase):
             keyargs = [arg_dict[arg_nm] for arg_nm in self.arg_names]
             list_args = arg_dict[self.list_name]
 
-            results = {}
-
-            def update_results_dict(res: Any, arg: Hashable) -> None:
-                results[arg] = res
-
-            # list of deferreds to wait for
-            cached_defers = []
-
-            missing = set()
-
             # If the cache takes a single arg then that is used as the key,
             # otherwise a tuple is used.
             if num_args == 1:
@@ -457,6 +448,9 @@ class DeferredCacheListDescriptor(_CacheDescriptorBase):
                 def arg_to_cache_key(arg: Hashable) -> Hashable:
                     return arg
 
+                def cache_key_to_arg(key: tuple) -> Hashable:
+                    return key
+
             else:
                 keylist = list(keyargs)
 
@@ -464,58 +458,53 @@ class DeferredCacheListDescriptor(_CacheDescriptorBase):
                     keylist[self.list_pos] = arg
                     return tuple(keylist)
 
-            for arg in list_args:
-                try:
-                    res = cache.get(arg_to_cache_key(arg), callback=invalidate_callback)
-                    if not res.called:
-                        res.addCallback(update_results_dict, arg)
-                        cached_defers.append(res)
-                    else:
-                        results[arg] = res.result
-                except KeyError:
-                    missing.add(arg)
+                def cache_key_to_arg(key: tuple) -> Hashable:
+                    return key[self.list_pos]
+
+            cache_keys = [arg_to_cache_key(arg) for arg in list_args]
+            immediate_results, pending_deferred, missing = cache.get_bulk(
+                cache_keys, callback=invalidate_callback
+            )
+
+            results = {cache_key_to_arg(key): v for key, v in immediate_results.items()}
+
+            cached_defers: List["defer.Deferred[Any]"] = []
+            if pending_deferred:
+
+                def update_results(r: Dict) -> None:
+                    for k, v in r.items():
+                        results[cache_key_to_arg(k)] = v
+
+                pending_deferred.addCallback(update_results)
+                cached_defers.append(pending_deferred)
 
             if missing:
-                # we need a deferred for each entry in the list,
-                # which we put in the cache. Each deferred resolves with the
-                # relevant result for that key.
-                deferreds_map = {}
-                for arg in missing:
-                    deferred: "defer.Deferred[Any]" = defer.Deferred()
-                    deferreds_map[arg] = deferred
-                    key = arg_to_cache_key(arg)
-                    cached_defers.append(
-                        cache.set(key, deferred, callback=invalidate_callback)
-                    )
+                cache_entry = cache.start_bulk_input(missing, invalidate_callback)
 
                 def complete_all(res: Dict[Hashable, Any]) -> None:
-                    # the wrapped function has completed. It returns a dict.
-                    # We can now update our own result map, and then resolve the
-                    # observable deferreds in the cache.
-                    for e, d1 in deferreds_map.items():
-                        val = res.get(e, None)
-                        # make sure we update the results map before running the
-                        # deferreds, because as soon as we run the last deferred, the
-                        # gatherResults() below will complete and return the result
-                        # dict to our caller.
-                        results[e] = val
-                        d1.callback(val)
+                    missing_results = {}
+                    for key in missing:
+                        arg = cache_key_to_arg(key)
+                        val = res.get(arg, None)
+
+                        results[arg] = val
+                        missing_results[key] = val
+
+                    cache_entry.complete_bulk(cache, missing_results)
 
                 def errback_all(f: Failure) -> None:
-                    # the wrapped function has failed. Propagate the failure into
-                    # the cache, which will invalidate the entry, and cause the
-                    # relevant cached_deferreds to fail, which will propagate the
-                    # failure to our caller.
-                    for d1 in deferreds_map.values():
-                        d1.errback(f)
+                    cache_entry.error_bulk(cache, missing, f)
 
                 args_to_call = dict(arg_dict)
-                args_to_call[self.list_name] = missing
+                args_to_call[self.list_name] = {
+                    cache_key_to_arg(key) for key in missing
+                }
 
                 # dispatch the call, and attach the two handlers
-                defer.maybeDeferred(
+                missing_d = defer.maybeDeferred(
                     preserve_fn(self.orig), **args_to_call
                 ).addCallbacks(complete_all, errback_all)
+                cached_defers.append(missing_d)
 
             if cached_defers:
                 d = defer.gatherResults(cached_defers, consumeErrors=True).addCallbacks(
diff --git a/synapse/util/caches/treecache.py b/synapse/util/caches/treecache.py
index c1b8ec0c73..fec31da2b6 100644
--- a/synapse/util/caches/treecache.py
+++ b/synapse/util/caches/treecache.py
@@ -135,6 +135,9 @@ class TreeCache:
     def values(self):
         return iterate_tree_cache_entry(self.root)
 
+    def items(self):
+        return iterate_tree_cache_items((), self.root)
+
     def __len__(self) -> int:
         return self.size
 
-- 
cgit 1.5.1


From 7af07f9716e6b4a2238ecd435e19bf8501360bc7 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 23 Aug 2022 21:53:37 -0500
Subject: Instrument `_check_sigs_and_hash_and_fetch` to trace time spent in
 child concurrent calls (#13588)

Instrument `_check_sigs_and_hash_and_fetch` to trace time spent in child concurrent calls because I've see `_check_sigs_and_hash_and_fetch` take [10.41s to process 100 events](https://github.com/matrix-org/synapse/issues/13587)

Fix https://github.com/matrix-org/synapse/issues/13587

Part of https://github.com/matrix-org/synapse/issues/13356
---
 changelog.d/13588.misc                  |  1 +
 synapse/crypto/event_signing.py         |  2 ++
 synapse/events/spamcheck.py             |  2 ++
 synapse/federation/federation_base.py   | 22 ++++++++++++++++++++++
 synapse/federation/federation_client.py | 23 ++++++++++++++++++++---
 5 files changed, 47 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/13588.misc

(limited to 'synapse')

diff --git a/changelog.d/13588.misc b/changelog.d/13588.misc
new file mode 100644
index 0000000000..eca1416ceb
--- /dev/null
+++ b/changelog.d/13588.misc
@@ -0,0 +1 @@
+Instrument `_check_sigs_and_hash_and_fetch` to trace time spent in child concurrent calls for understandable traces in Jaeger.
diff --git a/synapse/crypto/event_signing.py b/synapse/crypto/event_signing.py
index 7520647d1e..23b799ac32 100644
--- a/synapse/crypto/event_signing.py
+++ b/synapse/crypto/event_signing.py
@@ -28,6 +28,7 @@ from synapse.api.errors import Codes, SynapseError
 from synapse.api.room_versions import RoomVersion
 from synapse.events import EventBase
 from synapse.events.utils import prune_event, prune_event_dict
+from synapse.logging.opentracing import trace
 from synapse.types import JsonDict
 
 logger = logging.getLogger(__name__)
@@ -35,6 +36,7 @@ logger = logging.getLogger(__name__)
 Hasher = Callable[[bytes], "hashlib._Hash"]
 
 
+@trace
 def check_event_content_hash(
     event: EventBase, hash_algorithm: Hasher = hashlib.sha256
 ) -> bool:
diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py
index 4a3bfb38f1..623a2c71ea 100644
--- a/synapse/events/spamcheck.py
+++ b/synapse/events/spamcheck.py
@@ -32,6 +32,7 @@ from typing_extensions import Literal
 
 import synapse
 from synapse.api.errors import Codes
+from synapse.logging.opentracing import trace
 from synapse.rest.media.v1._base import FileInfo
 from synapse.rest.media.v1.media_storage import ReadableFileWrapper
 from synapse.spam_checker_api import RegistrationBehaviour
@@ -378,6 +379,7 @@ class SpamChecker:
         if check_media_file_for_spam is not None:
             self._check_media_file_for_spam_callbacks.append(check_media_file_for_spam)
 
+    @trace
     async def check_event_for_spam(
         self, event: "synapse.events.EventBase"
     ) -> Union[Tuple[Codes, JsonDict], str]:
diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py
index 2522bf78fc..4269a98db2 100644
--- a/synapse/federation/federation_base.py
+++ b/synapse/federation/federation_base.py
@@ -23,6 +23,7 @@ from synapse.crypto.keyring import Keyring
 from synapse.events import EventBase, make_event_from_dict
 from synapse.events.utils import prune_event, validate_canonicaljson
 from synapse.http.servlet import assert_params_in_dict
+from synapse.logging.opentracing import log_kv, trace
 from synapse.types import JsonDict, get_domain_from_id
 
 if TYPE_CHECKING:
@@ -55,6 +56,7 @@ class FederationBase:
         self._clock = hs.get_clock()
         self._storage_controllers = hs.get_storage_controllers()
 
+    @trace
     async def _check_sigs_and_hash(
         self, room_version: RoomVersion, pdu: EventBase
     ) -> EventBase:
@@ -97,17 +99,36 @@ class FederationBase:
                     "Event %s seems to have been redacted; using our redacted copy",
                     pdu.event_id,
                 )
+                log_kv(
+                    {
+                        "message": "Event seems to have been redacted; using our redacted copy",
+                        "event_id": pdu.event_id,
+                    }
+                )
             else:
                 logger.warning(
                     "Event %s content has been tampered, redacting",
                     pdu.event_id,
                 )
+                log_kv(
+                    {
+                        "message": "Event content has been tampered, redacting",
+                        "event_id": pdu.event_id,
+                    }
+                )
             return redacted_event
 
         spam_check = await self.spam_checker.check_event_for_spam(pdu)
 
         if spam_check != self.spam_checker.NOT_SPAM:
             logger.warning("Event contains spam, soft-failing %s", pdu.event_id)
+            log_kv(
+                {
+                    "message": "Event contains spam, redacting (to save disk space) "
+                    "as well as soft-failing (to stop using the event in prev_events)",
+                    "event_id": pdu.event_id,
+                }
+            )
             # we redact (to save disk space) as well as soft-failing (to stop
             # using the event in prev_events).
             redacted_event = prune_event(pdu)
@@ -117,6 +138,7 @@ class FederationBase:
         return pdu
 
 
+@trace
 async def _check_sigs_on_pdu(
     keyring: Keyring, room_version: RoomVersion, pdu: EventBase
 ) -> None:
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index 987f6dad46..7ee2974bb1 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -61,7 +61,7 @@ from synapse.federation.federation_base import (
 )
 from synapse.federation.transport.client import SendJoinResponse
 from synapse.http.types import QueryParams
-from synapse.logging.opentracing import SynapseTags, set_tag, tag_args, trace
+from synapse.logging.opentracing import SynapseTags, log_kv, set_tag, tag_args, trace
 from synapse.types import JsonDict, UserID, get_domain_from_id
 from synapse.util.async_helpers import concurrently_execute
 from synapse.util.caches.expiringcache import ExpiringCache
@@ -587,11 +587,15 @@ class FederationClient(FederationBase):
         Returns:
             A list of PDUs that have valid signatures and hashes.
         """
+        set_tag(
+            SynapseTags.RESULT_PREFIX + "pdus.length",
+            str(len(pdus)),
+        )
 
         # We limit how many PDUs we check at once, as if we try to do hundreds
         # of thousands of PDUs at once we see large memory spikes.
 
-        valid_pdus = []
+        valid_pdus: List[EventBase] = []
 
         async def _execute(pdu: EventBase) -> None:
             valid_pdu = await self._check_sigs_and_hash_and_fetch_one(
@@ -607,6 +611,8 @@ class FederationClient(FederationBase):
 
         return valid_pdus
 
+    @trace
+    @tag_args
     async def _check_sigs_and_hash_and_fetch_one(
         self,
         pdu: EventBase,
@@ -639,16 +645,27 @@ class FederationClient(FederationBase):
         except InvalidEventSignatureError as e:
             logger.warning(
                 "Signature on retrieved event %s was invalid (%s). "
-                "Checking local store/orgin server",
+                "Checking local store/origin server",
                 pdu.event_id,
                 e,
             )
+            log_kv(
+                {
+                    "message": "Signature on retrieved event was invalid. "
+                    "Checking local store/origin server",
+                    "event_id": pdu.event_id,
+                    "InvalidEventSignatureError": e,
+                }
+            )
 
         # Check local db.
         res = await self.store.get_event(
             pdu.event_id, allow_rejected=True, allow_none=True
         )
 
+        # If the PDU fails its signature check and we don't have it in our
+        # database, we then request it from sender's server (if that is not the
+        # same as `origin`).
         pdu_origin = get_domain_from_id(pdu.sender)
         if not res and pdu_origin != origin:
             try:
-- 
cgit 1.5.1


From ba882c03579e96a97568fca3551207690b302b30 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Wed, 24 Aug 2022 09:09:59 +0000
Subject: Faster Room Joins: fix `/make_knock` blocking indefinitely when the
 room in question is a partial-stated room. (#13583)

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
---
 changelog.d/13583.bugfix                |  1 +
 synapse/federation/federation_server.py | 11 +++++++++++
 2 files changed, 12 insertions(+)
 create mode 100644 changelog.d/13583.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13583.bugfix b/changelog.d/13583.bugfix
new file mode 100644
index 0000000000..1e4ce5904b
--- /dev/null
+++ b/changelog.d/13583.bugfix
@@ -0,0 +1 @@
+Faster Room Joins: fix `/make_knock` blocking indefinitely when the room in question is a partial-stated room.
\ No newline at end of file
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 75fbc6073d..3bf84cf625 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -763,6 +763,17 @@ class FederationServer(FederationBase):
             The partial knock event.
         """
         origin_host, _ = parse_server_name(origin)
+
+        if await self.store.is_partial_state_room(room_id):
+            # Before we do anything: check if the room is partial-stated.
+            # Note that at the time this check was added, `on_make_knock_request` would
+            # block due to https://github.com/matrix-org/synapse/issues/12997.
+            raise SynapseError(
+                404,
+                "Unable to handle /make_knock right now; this server is not fully joined.",
+                errcode=Codes.NOT_FOUND,
+            )
+
         await self.check_server_matches_acl(origin_host, room_id)
 
         room_version = await self.store.get_room_version(room_id)
-- 
cgit 1.5.1


From b687010f895ac54323a2fea96cd7720a6dfb99cd Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Wed, 24 Aug 2022 10:12:51 +0100
Subject: Rewrite get push actions queries (#13597)

---
 changelog.d/13597.misc                             |   1 +
 .../storage/databases/main/event_push_actions.py   | 228 ++++++---------------
 2 files changed, 69 insertions(+), 160 deletions(-)
 create mode 100644 changelog.d/13597.misc

(limited to 'synapse')

diff --git a/changelog.d/13597.misc b/changelog.d/13597.misc
new file mode 100644
index 0000000000..eb5e971008
--- /dev/null
+++ b/changelog.d/13597.misc
@@ -0,0 +1 @@
+ Optimise push action fetching queries. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index eabf9c9739..8dfa545c27 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -459,6 +459,32 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
 
         return await self.db_pool.runInteraction("get_push_action_users_in_range", f)
 
+    def _get_receipts_by_room_txn(
+        self, txn: LoggingTransaction, user_id: str
+    ) -> List[Tuple[str, int]]:
+        receipt_types_clause, args = make_in_list_sql_clause(
+            self.database_engine,
+            "receipt_type",
+            (
+                ReceiptTypes.READ,
+                ReceiptTypes.READ_PRIVATE,
+                ReceiptTypes.UNSTABLE_READ_PRIVATE,
+            ),
+        )
+
+        sql = f"""
+            SELECT room_id, MAX(stream_ordering)
+            FROM receipts_linearized
+            INNER JOIN events USING (room_id, event_id)
+            WHERE {receipt_types_clause}
+            AND user_id = ?
+            GROUP BY room_id
+        """
+
+        args.extend((user_id,))
+        txn.execute(sql, args)
+        return cast(List[Tuple[str, int]], txn.fetchall())
+
     async def get_unread_push_actions_for_user_in_range_for_http(
         self,
         user_id: str,
@@ -482,106 +508,45 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             The list will have between 0~limit entries.
         """
 
-        # find rooms that have a read receipt in them and return the next
-        # push actions
-        def get_after_receipt(
-            txn: LoggingTransaction,
-        ) -> List[Tuple[str, str, int, str, bool]]:
-            # find rooms that have a read receipt in them and return the next
-            # push actions
-
-            receipt_types_clause, args = make_in_list_sql_clause(
-                self.database_engine,
-                "receipt_type",
-                (
-                    ReceiptTypes.READ,
-                    ReceiptTypes.READ_PRIVATE,
-                    ReceiptTypes.UNSTABLE_READ_PRIVATE,
-                ),
-            )
-
-            sql = f"""
-                SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions,
-                    ep.highlight
-                FROM (
-                    SELECT room_id,
-                        MAX(stream_ordering) as stream_ordering
-                    FROM events
-                    INNER JOIN receipts_linearized USING (room_id, event_id)
-                    WHERE {receipt_types_clause} AND user_id = ?
-                    GROUP BY room_id
-                ) AS rl,
-                event_push_actions AS ep
-                WHERE
-                    ep.room_id = rl.room_id
-                    AND ep.stream_ordering > rl.stream_ordering
-                    AND ep.user_id = ?
-                    AND ep.stream_ordering > ?
-                    AND ep.stream_ordering <= ?
-                    AND ep.notif = 1
-                ORDER BY ep.stream_ordering ASC LIMIT ?
-            """
-            args.extend(
-                (user_id, user_id, min_stream_ordering, max_stream_ordering, limit)
-            )
-            txn.execute(sql, args)
-            return cast(List[Tuple[str, str, int, str, bool]], txn.fetchall())
-
-        after_read_receipt = await self.db_pool.runInteraction(
-            "get_unread_push_actions_for_user_in_range_http_arr", get_after_receipt
+        receipts_by_room = dict(
+            await self.db_pool.runInteraction(
+                "get_unread_push_actions_for_user_in_range_http_receipts",
+                self._get_receipts_by_room_txn,
+                user_id=user_id,
+            ),
         )
 
-        # There are rooms with push actions in them but you don't have a read receipt in
-        # them e.g. rooms you've been invited to, so get push actions for rooms which do
-        # not have read receipts in them too.
-        def get_no_receipt(
+        def get_push_actions_txn(
             txn: LoggingTransaction,
         ) -> List[Tuple[str, str, int, str, bool]]:
-            receipt_types_clause, args = make_in_list_sql_clause(
-                self.database_engine,
-                "receipt_type",
-                (
-                    ReceiptTypes.READ,
-                    ReceiptTypes.READ_PRIVATE,
-                    ReceiptTypes.UNSTABLE_READ_PRIVATE,
-                ),
-            )
-
-            sql = f"""
-                SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions,
-                    ep.highlight
+            sql = """
+                SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions, ep.highlight
                 FROM event_push_actions AS ep
-                INNER JOIN events AS e USING (room_id, event_id)
                 WHERE
-                    ep.room_id NOT IN (
-                        SELECT room_id FROM receipts_linearized
-                        WHERE {receipt_types_clause} AND user_id = ?
-                        GROUP BY room_id
-                    )
-                    AND ep.user_id = ?
+                    ep.user_id = ?
                     AND ep.stream_ordering > ?
                     AND ep.stream_ordering <= ?
                     AND ep.notif = 1
                 ORDER BY ep.stream_ordering ASC LIMIT ?
             """
-            args.extend(
-                (user_id, user_id, min_stream_ordering, max_stream_ordering, limit)
-            )
-            txn.execute(sql, args)
+            txn.execute(sql, (user_id, min_stream_ordering, max_stream_ordering, limit))
             return cast(List[Tuple[str, str, int, str, bool]], txn.fetchall())
 
-        no_read_receipt = await self.db_pool.runInteraction(
-            "get_unread_push_actions_for_user_in_range_http_nrr", get_no_receipt
+        push_actions = await self.db_pool.runInteraction(
+            "get_unread_push_actions_for_user_in_range_http", get_push_actions_txn
         )
 
         notifs = [
             HttpPushAction(
-                event_id=row[0],
-                room_id=row[1],
-                stream_ordering=row[2],
-                actions=_deserialize_action(row[3], row[4]),
+                event_id=event_id,
+                room_id=room_id,
+                stream_ordering=stream_ordering,
+                actions=_deserialize_action(actions, highlight),
             )
-            for row in after_read_receipt + no_read_receipt
+            for event_id, room_id, stream_ordering, actions, highlight in push_actions
+            # Only include push actions with a stream ordering after any receipt, or without any
+            # receipt present (invited to but never read rooms).
+            if stream_ordering > receipts_by_room.get(room_id, 0)
         ]
 
         # Now sort it so it's ordered correctly, since currently it will
@@ -617,106 +582,49 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             The list will have between 0~limit entries.
         """
 
-        # find rooms that have a read receipt in them and return the most recent
-        # push actions
-        def get_after_receipt(
-            txn: LoggingTransaction,
-        ) -> List[Tuple[str, str, int, str, bool, int]]:
-            receipt_types_clause, args = make_in_list_sql_clause(
-                self.database_engine,
-                "receipt_type",
-                (
-                    ReceiptTypes.READ,
-                    ReceiptTypes.READ_PRIVATE,
-                    ReceiptTypes.UNSTABLE_READ_PRIVATE,
-                ),
-            )
-
-            sql = f"""
-                SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions,
-                    ep.highlight, e.received_ts
-                FROM (
-                    SELECT room_id,
-                        MAX(stream_ordering) as stream_ordering
-                    FROM events
-                    INNER JOIN receipts_linearized USING (room_id, event_id)
-                    WHERE {receipt_types_clause} AND user_id = ?
-                    GROUP BY room_id
-                ) AS rl,
-                event_push_actions AS ep
-                INNER JOIN events AS e USING (room_id, event_id)
-                WHERE
-                    ep.room_id = rl.room_id
-                    AND ep.stream_ordering > rl.stream_ordering
-                    AND ep.user_id = ?
-                    AND ep.stream_ordering > ?
-                    AND ep.stream_ordering <= ?
-                    AND ep.notif = 1
-                ORDER BY ep.stream_ordering DESC LIMIT ?
-            """
-            args.extend(
-                (user_id, user_id, min_stream_ordering, max_stream_ordering, limit)
-            )
-            txn.execute(sql, args)
-            return cast(List[Tuple[str, str, int, str, bool, int]], txn.fetchall())
-
-        after_read_receipt = await self.db_pool.runInteraction(
-            "get_unread_push_actions_for_user_in_range_email_arr", get_after_receipt
+        receipts_by_room = dict(
+            await self.db_pool.runInteraction(
+                "get_unread_push_actions_for_user_in_range_email_receipts",
+                self._get_receipts_by_room_txn,
+                user_id=user_id,
+            ),
         )
 
-        # There are rooms with push actions in them but you don't have a read receipt in
-        # them e.g. rooms you've been invited to, so get push actions for rooms which do
-        # not have read receipts in them too.
-        def get_no_receipt(
+        def get_push_actions_txn(
             txn: LoggingTransaction,
         ) -> List[Tuple[str, str, int, str, bool, int]]:
-            receipt_types_clause, args = make_in_list_sql_clause(
-                self.database_engine,
-                "receipt_type",
-                (
-                    ReceiptTypes.READ,
-                    ReceiptTypes.READ_PRIVATE,
-                    ReceiptTypes.UNSTABLE_READ_PRIVATE,
-                ),
-            )
-
-            sql = f"""
+            sql = """
                 SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions,
                     ep.highlight, e.received_ts
                 FROM event_push_actions AS ep
                 INNER JOIN events AS e USING (room_id, event_id)
                 WHERE
-                    ep.room_id NOT IN (
-                        SELECT room_id FROM receipts_linearized
-                        WHERE {receipt_types_clause} AND user_id = ?
-                        GROUP BY room_id
-                    )
-                    AND ep.user_id = ?
+                    ep.user_id = ?
                     AND ep.stream_ordering > ?
                     AND ep.stream_ordering <= ?
                     AND ep.notif = 1
                 ORDER BY ep.stream_ordering DESC LIMIT ?
             """
-            args.extend(
-                (user_id, user_id, min_stream_ordering, max_stream_ordering, limit)
-            )
-            txn.execute(sql, args)
+            txn.execute(sql, (user_id, min_stream_ordering, max_stream_ordering, limit))
             return cast(List[Tuple[str, str, int, str, bool, int]], txn.fetchall())
 
-        no_read_receipt = await self.db_pool.runInteraction(
-            "get_unread_push_actions_for_user_in_range_email_nrr", get_no_receipt
+        push_actions = await self.db_pool.runInteraction(
+            "get_unread_push_actions_for_user_in_range_email", get_push_actions_txn
         )
 
         # Make a list of dicts from the two sets of results.
         notifs = [
             EmailPushAction(
-                event_id=row[0],
-                room_id=row[1],
-                stream_ordering=row[2],
-                actions=_deserialize_action(row[3], row[4]),
-                received_ts=row[5],
+                event_id=event_id,
+                room_id=room_id,
+                stream_ordering=stream_ordering,
+                actions=_deserialize_action(actions, highlight),
+                received_ts=received_ts,
             )
-            for row in after_read_receipt + no_read_receipt
+            for event_id, room_id, stream_ordering, actions, highlight, received_ts in push_actions
+            # Only include push actions with a stream ordering after any receipt, or without any
+            # receipt present (invited to but never read rooms).
+            if stream_ordering > receipts_by_room.get(room_id, 0)
         ]
 
         # Now sort it so it's ordered correctly, since currently it will
-- 
cgit 1.5.1


From be4250c7a888e314e361df42042bfa344ab65d55 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Wed, 24 Aug 2022 11:35:54 +0000
Subject: Add experimental configuration option to allow disabling legacy
 Prometheus metric names. (#13540)

Co-authored-by: David Robertson <davidr@element.io>
---
 changelog.d/13540.misc                |   1 +
 synapse/app/_base.py                  |  39 ++++-
 synapse/app/generic_worker.py         |   6 +-
 synapse/app/homeserver.py             |   6 +-
 synapse/config/metrics.py             |  29 ++++
 synapse/metrics/__init__.py           |   4 +-
 synapse/metrics/_exposition.py        | 262 -------------------------------
 synapse/metrics/_legacy_exposition.py | 284 ++++++++++++++++++++++++++++++++++
 synapse/util/caches/__init__.py       |  16 +-
 tests/test_metrics.py                 |  36 +++++
 10 files changed, 406 insertions(+), 277 deletions(-)
 create mode 100644 changelog.d/13540.misc
 delete mode 100644 synapse/metrics/_exposition.py
 create mode 100644 synapse/metrics/_legacy_exposition.py

(limited to 'synapse')

diff --git a/changelog.d/13540.misc b/changelog.d/13540.misc
new file mode 100644
index 0000000000..07ace50b12
--- /dev/null
+++ b/changelog.d/13540.misc
@@ -0,0 +1 @@
+Add experimental configuration option to allow disabling legacy Prometheus metric names.
\ No newline at end of file
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 923891ae0d..4742435d3b 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -266,15 +266,48 @@ def register_start(
     reactor.callWhenRunning(lambda: defer.ensureDeferred(wrapper()))
 
 
-def listen_metrics(bind_addresses: Iterable[str], port: int) -> None:
+def listen_metrics(
+    bind_addresses: Iterable[str], port: int, enable_legacy_metric_names: bool
+) -> None:
     """
     Start Prometheus metrics server.
     """
-    from synapse.metrics import RegistryProxy, start_http_server
+    from prometheus_client import start_http_server as start_http_server_prometheus
+
+    from synapse.metrics import (
+        RegistryProxy,
+        start_http_server as start_http_server_legacy,
+    )
 
     for host in bind_addresses:
         logger.info("Starting metrics listener on %s:%d", host, port)
-        start_http_server(port, addr=host, registry=RegistryProxy)
+        if enable_legacy_metric_names:
+            start_http_server_legacy(port, addr=host, registry=RegistryProxy)
+        else:
+            _set_prometheus_client_use_created_metrics(False)
+            start_http_server_prometheus(port, addr=host, registry=RegistryProxy)
+
+
+def _set_prometheus_client_use_created_metrics(new_value: bool) -> None:
+    """
+    Sets whether prometheus_client should expose `_created`-suffixed metrics for
+    all gauges, histograms and summaries.
+    There is no programmatic way to disable this without poking at internals;
+    the proper way is to use an environment variable which prometheus_client
+    loads at import time.
+
+    The motivation for disabling these `_created` metrics is that they're
+    a waste of space as they're not useful but they take up space in Prometheus.
+    """
+
+    import prometheus_client.metrics
+
+    if hasattr(prometheus_client.metrics, "_use_created"):
+        prometheus_client.metrics._use_created = new_value
+    else:
+        logger.error(
+            "Can't disable `_created` metrics in prometheus_client (brittle hack broken?)"
+        )
 
 
 def listen_manhole(
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index 30e21d9707..5e3825fca6 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -412,7 +412,11 @@ class GenericWorkerServer(HomeServer):
                         "enable_metrics is not True!"
                     )
                 else:
-                    _base.listen_metrics(listener.bind_addresses, listener.port)
+                    _base.listen_metrics(
+                        listener.bind_addresses,
+                        listener.port,
+                        enable_legacy_metric_names=self.config.metrics.enable_legacy_metrics,
+                    )
             else:
                 logger.warning("Unsupported listener type: %s", listener.type)
 
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index 68993d91a9..e57a926032 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -307,7 +307,11 @@ class SynapseHomeServer(HomeServer):
                         "enable_metrics is not True!"
                     )
                 else:
-                    _base.listen_metrics(listener.bind_addresses, listener.port)
+                    _base.listen_metrics(
+                        listener.bind_addresses,
+                        listener.port,
+                        enable_legacy_metric_names=self.config.metrics.enable_legacy_metrics,
+                    )
             else:
                 # this shouldn't happen, as the listener type should have been checked
                 # during parsing
diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py
index 3b42be5b5b..f3134834e5 100644
--- a/synapse/config/metrics.py
+++ b/synapse/config/metrics.py
@@ -42,6 +42,35 @@ class MetricsConfig(Config):
 
     def read_config(self, config: JsonDict, **kwargs: Any) -> None:
         self.enable_metrics = config.get("enable_metrics", False)
+
+        """
+        ### `enable_legacy_metrics` (experimental)
+
+        **Experimental: this option may be removed or have its behaviour
+        changed at any time, with no notice.**
+
+        Set to `true` to publish both legacy and non-legacy Prometheus metric names,
+        or to `false` to only publish non-legacy Prometheus metric names.
+        Defaults to `true`. Has no effect if `enable_metrics` is `false`.
+
+        Legacy metric names include:
+        - metrics containing colons in the name, such as `synapse_util_caches_response_cache:hits`, because colons are supposed to be reserved for user-defined recording rules;
+        - counters that don't end with the `_total` suffix, such as `synapse_federation_client_sent_edus`, therefore not adhering to the OpenMetrics standard.
+
+        These legacy metric names are unconventional and not compliant with OpenMetrics standards.
+        They are included for backwards compatibility.
+
+        Example configuration:
+        ```yaml
+        enable_legacy_metrics: false
+        ```
+
+        See https://github.com/matrix-org/synapse/issues/11106 for context.
+
+        *Since v1.67.0.*
+        """
+        self.enable_legacy_metrics = config.get("enable_legacy_metrics", True)
+
         self.report_stats = config.get("report_stats", None)
         self.report_stats_endpoint = config.get(
             "report_stats_endpoint", "https://matrix.org/report-usage-stats/push"
diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py
index 496fce2ecc..c3d3daf877 100644
--- a/synapse/metrics/__init__.py
+++ b/synapse/metrics/__init__.py
@@ -46,12 +46,12 @@ from twisted.python.threadpool import ThreadPool
 
 # This module is imported for its side effects; flake8 needn't warn that it's unused.
 import synapse.metrics._reactor_metrics  # noqa: F401
-from synapse.metrics._exposition import (
+from synapse.metrics._gc import MIN_TIME_BETWEEN_GCS, install_gc_manager
+from synapse.metrics._legacy_exposition import (
     MetricsResource,
     generate_latest,
     start_http_server,
 )
-from synapse.metrics._gc import MIN_TIME_BETWEEN_GCS, install_gc_manager
 from synapse.metrics._types import Collector
 from synapse.util import SYNAPSE_VERSION
 
diff --git a/synapse/metrics/_exposition.py b/synapse/metrics/_exposition.py
deleted file mode 100644
index 353d0a63b6..0000000000
--- a/synapse/metrics/_exposition.py
+++ /dev/null
@@ -1,262 +0,0 @@
-# Copyright 2015-2019 Prometheus Python Client Developers
-# Copyright 2019 Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This code is based off `prometheus_client/exposition.py` from version 0.7.1.
-
-Due to the renaming of metrics in prometheus_client 0.4.0, this customised
-vendoring of the code will emit both the old versions that Synapse dashboards
-expect, and the newer "best practice" version of the up-to-date official client.
-"""
-
-import math
-import threading
-from http.server import BaseHTTPRequestHandler, HTTPServer
-from socketserver import ThreadingMixIn
-from typing import Any, Dict, List, Type, Union
-from urllib.parse import parse_qs, urlparse
-
-from prometheus_client import REGISTRY, CollectorRegistry
-from prometheus_client.core import Sample
-
-from twisted.web.resource import Resource
-from twisted.web.server import Request
-
-from synapse.util import caches
-
-CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8"
-
-
-def floatToGoString(d: Union[int, float]) -> str:
-    d = float(d)
-    if d == math.inf:
-        return "+Inf"
-    elif d == -math.inf:
-        return "-Inf"
-    elif math.isnan(d):
-        return "NaN"
-    else:
-        s = repr(d)
-        dot = s.find(".")
-        # Go switches to exponents sooner than Python.
-        # We only need to care about positive values for le/quantile.
-        if d > 0 and dot > 6:
-            mantissa = f"{s[0]}.{s[1:dot]}{s[dot + 1 :]}".rstrip("0.")
-            return f"{mantissa}e+0{dot - 1}"
-        return s
-
-
-def sample_line(line: Sample, name: str) -> str:
-    if line.labels:
-        labelstr = "{{{0}}}".format(
-            ",".join(
-                [
-                    '{}="{}"'.format(
-                        k,
-                        v.replace("\\", r"\\").replace("\n", r"\n").replace('"', r"\""),
-                    )
-                    for k, v in sorted(line.labels.items())
-                ]
-            )
-        )
-    else:
-        labelstr = ""
-    timestamp = ""
-    if line.timestamp is not None:
-        # Convert to milliseconds.
-        timestamp = f" {int(float(line.timestamp) * 1000):d}"
-    return "{}{} {}{}\n".format(name, labelstr, floatToGoString(line.value), timestamp)
-
-
-def generate_latest(registry: CollectorRegistry, emit_help: bool = False) -> bytes:
-
-    # Trigger the cache metrics to be rescraped, which updates the common
-    # metrics but do not produce metrics themselves
-    for collector in caches.collectors_by_name.values():
-        collector.collect()
-
-    output = []
-
-    for metric in registry.collect():
-        if not metric.samples:
-            # No samples, don't bother.
-            continue
-
-        mname = metric.name
-        mnewname = metric.name
-        mtype = metric.type
-
-        # OpenMetrics -> Prometheus
-        if mtype == "counter":
-            mnewname = mnewname + "_total"
-        elif mtype == "info":
-            mtype = "gauge"
-            mnewname = mnewname + "_info"
-        elif mtype == "stateset":
-            mtype = "gauge"
-        elif mtype == "gaugehistogram":
-            mtype = "histogram"
-        elif mtype == "unknown":
-            mtype = "untyped"
-
-        # Output in the old format for compatibility.
-        if emit_help:
-            output.append(
-                "# HELP {} {}\n".format(
-                    mname,
-                    metric.documentation.replace("\\", r"\\").replace("\n", r"\n"),
-                )
-            )
-        output.append(f"# TYPE {mname} {mtype}\n")
-
-        om_samples: Dict[str, List[str]] = {}
-        for s in metric.samples:
-            for suffix in ["_created", "_gsum", "_gcount"]:
-                if s.name == metric.name + suffix:
-                    # OpenMetrics specific sample, put in a gauge at the end.
-                    # (these come from gaugehistograms which don't get renamed,
-                    # so no need to faff with mnewname)
-                    om_samples.setdefault(suffix, []).append(sample_line(s, s.name))
-                    break
-            else:
-                newname = s.name.replace(mnewname, mname)
-                if ":" in newname and newname.endswith("_total"):
-                    newname = newname[: -len("_total")]
-                output.append(sample_line(s, newname))
-
-        for suffix, lines in sorted(om_samples.items()):
-            if emit_help:
-                output.append(
-                    "# HELP {}{} {}\n".format(
-                        metric.name,
-                        suffix,
-                        metric.documentation.replace("\\", r"\\").replace("\n", r"\n"),
-                    )
-                )
-            output.append(f"# TYPE {metric.name}{suffix} gauge\n")
-            output.extend(lines)
-
-        # Get rid of the weird colon things while we're at it
-        if mtype == "counter":
-            mnewname = mnewname.replace(":total", "")
-        mnewname = mnewname.replace(":", "_")
-
-        if mname == mnewname:
-            continue
-
-        # Also output in the new format, if it's different.
-        if emit_help:
-            output.append(
-                "# HELP {} {}\n".format(
-                    mnewname,
-                    metric.documentation.replace("\\", r"\\").replace("\n", r"\n"),
-                )
-            )
-        output.append(f"# TYPE {mnewname} {mtype}\n")
-
-        for s in metric.samples:
-            # Get rid of the OpenMetrics specific samples (we should already have
-            # dealt with them above anyway.)
-            for suffix in ["_created", "_gsum", "_gcount"]:
-                if s.name == metric.name + suffix:
-                    break
-            else:
-                output.append(
-                    sample_line(s, s.name.replace(":total", "").replace(":", "_"))
-                )
-
-    return "".join(output).encode("utf-8")
-
-
-class MetricsHandler(BaseHTTPRequestHandler):
-    """HTTP handler that gives metrics from ``REGISTRY``."""
-
-    registry = REGISTRY
-
-    def do_GET(self) -> None:
-        registry = self.registry
-        params = parse_qs(urlparse(self.path).query)
-
-        if "help" in params:
-            emit_help = True
-        else:
-            emit_help = False
-
-        try:
-            output = generate_latest(registry, emit_help=emit_help)
-        except Exception:
-            self.send_error(500, "error generating metric output")
-            raise
-        self.send_response(200)
-        self.send_header("Content-Type", CONTENT_TYPE_LATEST)
-        self.send_header("Content-Length", str(len(output)))
-        self.end_headers()
-        self.wfile.write(output)
-
-    def log_message(self, format: str, *args: Any) -> None:
-        """Log nothing."""
-
-    @classmethod
-    def factory(cls, registry: CollectorRegistry) -> Type:
-        """Returns a dynamic MetricsHandler class tied
-        to the passed registry.
-        """
-        # This implementation relies on MetricsHandler.registry
-        #  (defined above and defaulted to REGISTRY).
-
-        # As we have unicode_literals, we need to create a str()
-        #  object for type().
-        cls_name = str(cls.__name__)
-        MyMetricsHandler = type(cls_name, (cls, object), {"registry": registry})
-        return MyMetricsHandler
-
-
-class _ThreadingSimpleServer(ThreadingMixIn, HTTPServer):
-    """Thread per request HTTP server."""
-
-    # Make worker threads "fire and forget". Beginning with Python 3.7 this
-    # prevents a memory leak because ``ThreadingMixIn`` starts to gather all
-    # non-daemon threads in a list in order to join on them at server close.
-    # Enabling daemon threads virtually makes ``_ThreadingSimpleServer`` the
-    # same as Python 3.7's ``ThreadingHTTPServer``.
-    daemon_threads = True
-
-
-def start_http_server(
-    port: int, addr: str = "", registry: CollectorRegistry = REGISTRY
-) -> None:
-    """Starts an HTTP server for prometheus metrics as a daemon thread"""
-    CustomMetricsHandler = MetricsHandler.factory(registry)
-    httpd = _ThreadingSimpleServer((addr, port), CustomMetricsHandler)
-    t = threading.Thread(target=httpd.serve_forever)
-    t.daemon = True
-    t.start()
-
-
-class MetricsResource(Resource):
-    """
-    Twisted ``Resource`` that serves prometheus metrics.
-    """
-
-    isLeaf = True
-
-    def __init__(self, registry: CollectorRegistry = REGISTRY):
-        self.registry = registry
-
-    def render_GET(self, request: Request) -> bytes:
-        request.setHeader(b"Content-Type", CONTENT_TYPE_LATEST.encode("ascii"))
-        response = generate_latest(self.registry)
-        request.setHeader(b"Content-Length", str(len(response)))
-        return response
diff --git a/synapse/metrics/_legacy_exposition.py b/synapse/metrics/_legacy_exposition.py
new file mode 100644
index 0000000000..ff640a49af
--- /dev/null
+++ b/synapse/metrics/_legacy_exposition.py
@@ -0,0 +1,284 @@
+# Copyright 2015-2019 Prometheus Python Client Developers
+# Copyright 2019 Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This code is based off `prometheus_client/exposition.py` from version 0.7.1.
+
+Due to the renaming of metrics in prometheus_client 0.4.0, this customised
+vendoring of the code will emit both the old versions that Synapse dashboards
+expect, and the newer "best practice" version of the up-to-date official client.
+"""
+
+import math
+import threading
+from http.server import BaseHTTPRequestHandler, HTTPServer
+from socketserver import ThreadingMixIn
+from typing import Any, Dict, List, Type, Union
+from urllib.parse import parse_qs, urlparse
+
+from prometheus_client import REGISTRY, CollectorRegistry
+from prometheus_client.core import Sample
+
+from twisted.web.resource import Resource
+from twisted.web.server import Request
+
+from synapse.util import caches
+
+CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8"
+
+
+def floatToGoString(d: Union[int, float]) -> str:
+    d = float(d)
+    if d == math.inf:
+        return "+Inf"
+    elif d == -math.inf:
+        return "-Inf"
+    elif math.isnan(d):
+        return "NaN"
+    else:
+        s = repr(d)
+        dot = s.find(".")
+        # Go switches to exponents sooner than Python.
+        # We only need to care about positive values for le/quantile.
+        if d > 0 and dot > 6:
+            mantissa = f"{s[0]}.{s[1:dot]}{s[dot + 1 :]}".rstrip("0.")
+            return f"{mantissa}e+0{dot - 1}"
+        return s
+
+
+def sample_line(line: Sample, name: str) -> str:
+    if line.labels:
+        labelstr = "{{{0}}}".format(
+            ",".join(
+                [
+                    '{}="{}"'.format(
+                        k,
+                        v.replace("\\", r"\\").replace("\n", r"\n").replace('"', r"\""),
+                    )
+                    for k, v in sorted(line.labels.items())
+                ]
+            )
+        )
+    else:
+        labelstr = ""
+    timestamp = ""
+    if line.timestamp is not None:
+        # Convert to milliseconds.
+        timestamp = f" {int(float(line.timestamp) * 1000):d}"
+    return "{}{} {}{}\n".format(name, labelstr, floatToGoString(line.value), timestamp)
+
+
+# Mapping from new metric names to legacy metric names.
+# We translate these back to their old names when exposing them through our
+# legacy vendored exporter.
+# Only this legacy exposition module applies these name changes.
+LEGACY_METRIC_NAMES = {
+    "synapse_util_caches_cache_hits": "synapse_util_caches_cache:hits",
+    "synapse_util_caches_cache_size": "synapse_util_caches_cache:size",
+    "synapse_util_caches_cache_evicted_size": "synapse_util_caches_cache:evicted_size",
+    "synapse_util_caches_cache_total": "synapse_util_caches_cache:total",
+    "synapse_util_caches_response_cache_size": "synapse_util_caches_response_cache:size",
+    "synapse_util_caches_response_cache_hits": "synapse_util_caches_response_cache:hits",
+    "synapse_util_caches_response_cache_evicted_size": "synapse_util_caches_response_cache:evicted_size",
+    "synapse_util_caches_response_cache_total": "synapse_util_caches_response_cache:total",
+}
+
+
+def generate_latest(registry: CollectorRegistry, emit_help: bool = False) -> bytes:
+    """
+    Generate metrics in legacy format. Modern metrics are generated directly
+    by prometheus-client.
+    """
+
+    # Trigger the cache metrics to be rescraped, which updates the common
+    # metrics but do not produce metrics themselves
+    for collector in caches.collectors_by_name.values():
+        collector.collect()
+
+    output = []
+
+    for metric in registry.collect():
+        if not metric.samples:
+            # No samples, don't bother.
+            continue
+
+        # Translate to legacy metric name if it has one.
+        mname = LEGACY_METRIC_NAMES.get(metric.name, metric.name)
+        mnewname = metric.name
+        mtype = metric.type
+
+        # OpenMetrics -> Prometheus
+        if mtype == "counter":
+            mnewname = mnewname + "_total"
+        elif mtype == "info":
+            mtype = "gauge"
+            mnewname = mnewname + "_info"
+        elif mtype == "stateset":
+            mtype = "gauge"
+        elif mtype == "gaugehistogram":
+            mtype = "histogram"
+        elif mtype == "unknown":
+            mtype = "untyped"
+
+        # Output in the old format for compatibility.
+        if emit_help:
+            output.append(
+                "# HELP {} {}\n".format(
+                    mname,
+                    metric.documentation.replace("\\", r"\\").replace("\n", r"\n"),
+                )
+            )
+        output.append(f"# TYPE {mname} {mtype}\n")
+
+        om_samples: Dict[str, List[str]] = {}
+        for s in metric.samples:
+            for suffix in ["_created", "_gsum", "_gcount"]:
+                if s.name == mname + suffix:
+                    # OpenMetrics specific sample, put in a gauge at the end.
+                    # (these come from gaugehistograms which don't get renamed,
+                    # so no need to faff with mnewname)
+                    om_samples.setdefault(suffix, []).append(sample_line(s, s.name))
+                    break
+            else:
+                newname = s.name.replace(mnewname, mname)
+                if ":" in newname and newname.endswith("_total"):
+                    newname = newname[: -len("_total")]
+                output.append(sample_line(s, newname))
+
+        for suffix, lines in sorted(om_samples.items()):
+            if emit_help:
+                output.append(
+                    "# HELP {}{} {}\n".format(
+                        mname,
+                        suffix,
+                        metric.documentation.replace("\\", r"\\").replace("\n", r"\n"),
+                    )
+                )
+            output.append(f"# TYPE {mname}{suffix} gauge\n")
+            output.extend(lines)
+
+        # Get rid of the weird colon things while we're at it
+        if mtype == "counter":
+            mnewname = mnewname.replace(":total", "")
+        mnewname = mnewname.replace(":", "_")
+
+        if mname == mnewname:
+            continue
+
+        # Also output in the new format, if it's different.
+        if emit_help:
+            output.append(
+                "# HELP {} {}\n".format(
+                    mnewname,
+                    metric.documentation.replace("\\", r"\\").replace("\n", r"\n"),
+                )
+            )
+        output.append(f"# TYPE {mnewname} {mtype}\n")
+
+        for s in metric.samples:
+            # Get rid of the OpenMetrics specific samples (we should already have
+            # dealt with them above anyway.)
+            for suffix in ["_created", "_gsum", "_gcount"]:
+                if s.name == mname + suffix:
+                    break
+            else:
+                sample_name = LEGACY_METRIC_NAMES.get(s.name, s.name)
+                output.append(
+                    sample_line(s, sample_name.replace(":total", "").replace(":", "_"))
+                )
+
+    return "".join(output).encode("utf-8")
+
+
+class MetricsHandler(BaseHTTPRequestHandler):
+    """HTTP handler that gives metrics from ``REGISTRY``."""
+
+    registry = REGISTRY
+
+    def do_GET(self) -> None:
+        registry = self.registry
+        params = parse_qs(urlparse(self.path).query)
+
+        if "help" in params:
+            emit_help = True
+        else:
+            emit_help = False
+
+        try:
+            output = generate_latest(registry, emit_help=emit_help)
+        except Exception:
+            self.send_error(500, "error generating metric output")
+            raise
+        self.send_response(200)
+        self.send_header("Content-Type", CONTENT_TYPE_LATEST)
+        self.send_header("Content-Length", str(len(output)))
+        self.end_headers()
+        self.wfile.write(output)
+
+    def log_message(self, format: str, *args: Any) -> None:
+        """Log nothing."""
+
+    @classmethod
+    def factory(cls, registry: CollectorRegistry) -> Type:
+        """Returns a dynamic MetricsHandler class tied
+        to the passed registry.
+        """
+        # This implementation relies on MetricsHandler.registry
+        #  (defined above and defaulted to REGISTRY).
+
+        # As we have unicode_literals, we need to create a str()
+        #  object for type().
+        cls_name = str(cls.__name__)
+        MyMetricsHandler = type(cls_name, (cls, object), {"registry": registry})
+        return MyMetricsHandler
+
+
+class _ThreadingSimpleServer(ThreadingMixIn, HTTPServer):
+    """Thread per request HTTP server."""
+
+    # Make worker threads "fire and forget". Beginning with Python 3.7 this
+    # prevents a memory leak because ``ThreadingMixIn`` starts to gather all
+    # non-daemon threads in a list in order to join on them at server close.
+    # Enabling daemon threads virtually makes ``_ThreadingSimpleServer`` the
+    # same as Python 3.7's ``ThreadingHTTPServer``.
+    daemon_threads = True
+
+
+def start_http_server(
+    port: int, addr: str = "", registry: CollectorRegistry = REGISTRY
+) -> None:
+    """Starts an HTTP server for prometheus metrics as a daemon thread"""
+    CustomMetricsHandler = MetricsHandler.factory(registry)
+    httpd = _ThreadingSimpleServer((addr, port), CustomMetricsHandler)
+    t = threading.Thread(target=httpd.serve_forever)
+    t.daemon = True
+    t.start()
+
+
+class MetricsResource(Resource):
+    """
+    Twisted ``Resource`` that serves prometheus metrics.
+    """
+
+    isLeaf = True
+
+    def __init__(self, registry: CollectorRegistry = REGISTRY):
+        self.registry = registry
+
+    def render_GET(self, request: Request) -> bytes:
+        request.setHeader(b"Content-Type", CONTENT_TYPE_LATEST.encode("ascii"))
+        response = generate_latest(self.registry)
+        request.setHeader(b"Content-Length", str(len(response)))
+        return response
diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py
index 42f6abb5e1..bdf9b0dc8c 100644
--- a/synapse/util/caches/__init__.py
+++ b/synapse/util/caches/__init__.py
@@ -34,10 +34,10 @@ TRACK_MEMORY_USAGE = False
 caches_by_name: Dict[str, Sized] = {}
 collectors_by_name: Dict[str, "CacheMetric"] = {}
 
-cache_size = Gauge("synapse_util_caches_cache:size", "", ["name"])
-cache_hits = Gauge("synapse_util_caches_cache:hits", "", ["name"])
-cache_evicted = Gauge("synapse_util_caches_cache:evicted_size", "", ["name", "reason"])
-cache_total = Gauge("synapse_util_caches_cache:total", "", ["name"])
+cache_size = Gauge("synapse_util_caches_cache_size", "", ["name"])
+cache_hits = Gauge("synapse_util_caches_cache_hits", "", ["name"])
+cache_evicted = Gauge("synapse_util_caches_cache_evicted_size", "", ["name", "reason"])
+cache_total = Gauge("synapse_util_caches_cache_total", "", ["name"])
 cache_max_size = Gauge("synapse_util_caches_cache_max_size", "", ["name"])
 cache_memory_usage = Gauge(
     "synapse_util_caches_cache_size_bytes",
@@ -45,12 +45,12 @@ cache_memory_usage = Gauge(
     ["name"],
 )
 
-response_cache_size = Gauge("synapse_util_caches_response_cache:size", "", ["name"])
-response_cache_hits = Gauge("synapse_util_caches_response_cache:hits", "", ["name"])
+response_cache_size = Gauge("synapse_util_caches_response_cache_size", "", ["name"])
+response_cache_hits = Gauge("synapse_util_caches_response_cache_hits", "", ["name"])
 response_cache_evicted = Gauge(
-    "synapse_util_caches_response_cache:evicted_size", "", ["name", "reason"]
+    "synapse_util_caches_response_cache_evicted_size", "", ["name", "reason"]
 )
-response_cache_total = Gauge("synapse_util_caches_response_cache:total", "", ["name"])
+response_cache_total = Gauge("synapse_util_caches_response_cache_total", "", ["name"])
 
 
 class EvictionReason(Enum):
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index b4574b2ffe..1a70eddc9b 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -12,7 +12,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+try:
+    from importlib import metadata
+except ImportError:
+    import importlib_metadata as metadata  # type: ignore[no-redef]
 
+from unittest.mock import patch
+
+from pkg_resources import parse_version
+
+from synapse.app._base import _set_prometheus_client_use_created_metrics
 from synapse.metrics import REGISTRY, InFlightGauge, generate_latest
 from synapse.util.caches.deferred_cache import DeferredCache
 
@@ -162,3 +171,30 @@ class CacheMetricsTests(unittest.HomeserverTestCase):
 
         self.assertEqual(items["synapse_util_caches_cache_size"], "1.0")
         self.assertEqual(items["synapse_util_caches_cache_max_size"], "777.0")
+
+
+class PrometheusMetricsHackTestCase(unittest.HomeserverTestCase):
+    if parse_version(metadata.version("prometheus_client")) < parse_version("0.14.0"):
+        skip = "prometheus-client too old"
+
+    def test_created_metrics_disabled(self) -> None:
+        """
+        Tests that a brittle hack, to disable `_created` metrics, works.
+        This involves poking at the internals of prometheus-client.
+        It's not the end of the world if this doesn't work.
+
+        This test gives us a way to notice if prometheus-client changes
+        their internals.
+        """
+        import prometheus_client.metrics
+
+        PRIVATE_FLAG_NAME = "_use_created"
+
+        # By default, the pesky `_created` metrics are enabled.
+        # Check this assumption is still valid.
+        self.assertTrue(getattr(prometheus_client.metrics, PRIVATE_FLAG_NAME))
+
+        with patch("prometheus_client.metrics") as mock:
+            setattr(mock, PRIVATE_FLAG_NAME, True)
+            _set_prometheus_client_use_created_metrics(False)
+            self.assertFalse(getattr(mock, PRIVATE_FLAG_NAME, False))
-- 
cgit 1.5.1


From c807b814ae78b6adb5db6485a3ba7de79e36c826 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Wed, 24 Aug 2022 11:14:28 -0500
Subject: Use dedicated `get_local_users_in_room` to find local users when
 calculating `join_authorised_via_users_server` of a `/make_join` request
 (#13606)

Use dedicated `get_local_users_in_room` to find local users when calculating `join_authorised_via_users_server` ("the authorising user for joining a restricted room") of a `/make_join` request.

Found while working on https://github.com/matrix-org/synapse/pull/13575#discussion_r953023755 but it's not related.
---
 changelog.d/13606.misc         | 1 +
 synapse/handlers/event_auth.py | 9 +++------
 2 files changed, 4 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/13606.misc

(limited to 'synapse')

diff --git a/changelog.d/13606.misc b/changelog.d/13606.misc
new file mode 100644
index 0000000000..58a4467798
--- /dev/null
+++ b/changelog.d/13606.misc
@@ -0,0 +1 @@
+Use dedicated `get_local_users_in_room(room_id)` function to find local users when calculating `join_authorised_via_users_server` of a `/make_join` request.
diff --git a/synapse/handlers/event_auth.py b/synapse/handlers/event_auth.py
index a2dd9c7efa..c3ddc5d182 100644
--- a/synapse/handlers/event_auth.py
+++ b/synapse/handlers/event_auth.py
@@ -129,12 +129,9 @@ class EventAuthHandler:
         else:
             users = {}
 
-        # Find the user with the highest power level.
-        users_in_room = await self._store.get_users_in_room(room_id)
-        # Only interested in local users.
-        local_users_in_room = [
-            u for u in users_in_room if get_domain_from_id(u) == self._server_name
-        ]
+        # Find the user with the highest power level (only interested in local
+        # users).
+        local_users_in_room = await self._store.get_local_users_in_room(room_id)
         chosen_user = max(
             local_users_in_room,
             key=lambda user: users.get(user, users_default_level),
-- 
cgit 1.5.1


From b93bd95e8ab64d27ae26841020f62ee61272a5f2 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Wed, 24 Aug 2022 12:53:46 -0500
Subject: When loading current ids, sort by `stream_id` to avoid incorrect
 overwrite and avoid errors caused by sorting alphabetical instance name which
 can be `null` (#13585)

When loading current ids, sort by stream ID so that we don't want to overwrite the `current_position` of an instance to a lower stream ID than we're actually at ([discussion](https://github.com/matrix-org/synapse/pull/13585#discussion_r951795379)). Previously, it sorted alphabetically by instance name which can be `null` and throw errors but more importantly, accomplishes nothing.

Fixes the following startup error which is why I started looking into this area:

```
$ poetry run synapse_homeserver --config-path homeserver.yaml
****************************************************************
 Error during initialisation:
    '<' not supported between instances of 'NoneType' and 'str'
 There may be more information in the logs.
****************************************************************
```

Somehow my database ended up looking like the following, notice the `instance_name` is `null` in the db, and we can't sort `NoneType` things. Another question is why do we see the `instance_name` as `null` sometimes instead of `master` in monolith mode?
```
$ psql synapse
synapse=# SELECT * FROM stream_positions;
   stream_name   | instance_name | stream_id
-----------------+---------------+-----------
 account_data    | master        |      1242
 events          | master        |      1787
 to_device       | master        |        58
 presence_stream | master        |    485638
 receipts        | master        |       341
 backfill        | master        |   -139106
(6 rows)
synapse=# SELECT instance_name, stream_id FROM receipts_linearized;
 instance_name | stream_id
---------------+-----------
               |       211
               |         3
               |         4
               |       212
               |       213
               |       224
               |       228
               |       164
               |       313
               |       253
               |        38
               |       321
               |       324
               |       189
               |       192
               |       193
               |       194
               |       195
               |       197
               |       198
               |       275
               |        79
               |       339
               |       340
               |        82
               |       341
               |        84
               |        85
               |        91
               |       119
```
---
 changelog.d/13585.bugfix              |  1 +
 synapse/storage/util/id_generators.py | 13 +++++++++++--
 2 files changed, 12 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13585.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13585.bugfix b/changelog.d/13585.bugfix
new file mode 100644
index 0000000000..664b986c59
--- /dev/null
+++ b/changelog.d/13585.bugfix
@@ -0,0 +1 @@
+Fix loading the current stream position behind the actual position.
diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py
index 3c13859faa..2dfe4c0b66 100644
--- a/synapse/storage/util/id_generators.py
+++ b/synapse/storage/util/id_generators.py
@@ -460,8 +460,17 @@ class MultiWriterIdGenerator(AbstractStreamIdGenerator):
                 # Cast safety: this corresponds to the types returned by the query above.
                 rows.extend(cast(Iterable[Tuple[str, int]], cur))
 
-            # Sort so that we handle rows in order for each instance.
-            rows.sort()
+            # Sort by stream_id (ascending, lowest -> highest) so that we handle
+            # rows in order for each instance because we don't want to overwrite
+            # the current_position of an instance to a lower stream ID than
+            # we're actually at.
+            def sort_by_stream_id_key_func(row: Tuple[str, int]) -> int:
+                (instance, stream_id) = row
+                # If `stream_id` is ever `None`, we will see a `TypeError: '<'
+                # not supported between instances of 'NoneType' and 'X'` error.
+                return stream_id
+
+            rows.sort(key=sort_by_stream_id_key_func)
 
             with self._lock:
                 for (
-- 
cgit 1.5.1


From d58615c82cec5bd866bedcb33e3e2a5d2a961c44 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Wed, 24 Aug 2022 14:13:12 -0500
Subject: Directly lookup local membership instead of getting all members in a
 room first (`get_users_in_room` mis-use) (#13608)

See https://github.com/matrix-org/synapse/pull/13575#discussion_r953023755
---
 changelog.d/13608.misc                           |  1 +
 synapse/handlers/events.py                       |  9 +++++---
 synapse/handlers/message.py                      |  6 ++++--
 synapse/handlers/room.py                         |  7 +++++--
 synapse/handlers/room_member.py                  |  6 ++++--
 synapse/server_notices/server_notices_manager.py | 10 +++++++--
 synapse/storage/databases/main/roommember.py     | 26 ++++++++++++++++++++++++
 tests/rest/client/test_relations.py              | 12 +++++------
 8 files changed, 60 insertions(+), 17 deletions(-)
 create mode 100644 changelog.d/13608.misc

(limited to 'synapse')

diff --git a/changelog.d/13608.misc b/changelog.d/13608.misc
new file mode 100644
index 0000000000..19bcc45e33
--- /dev/null
+++ b/changelog.d/13608.misc
@@ -0,0 +1 @@
+Refactor `get_users_in_room(room_id)` mis-use to lookup single local user with dedicated `check_local_user_in_room(...)` function.
diff --git a/synapse/handlers/events.py b/synapse/handlers/events.py
index ac13340d3a..949b69cb41 100644
--- a/synapse/handlers/events.py
+++ b/synapse/handlers/events.py
@@ -151,7 +151,7 @@ class EventHandler:
         """Retrieve a single specified event.
 
         Args:
-            user: The user requesting the event
+            user: The local user requesting the event
             room_id: The expected room id. We'll return None if the
                 event's room does not match.
             event_id: The event ID to obtain.
@@ -173,8 +173,11 @@ class EventHandler:
         if not event:
             return None
 
-        users = await self.store.get_users_in_room(event.room_id)
-        is_peeking = user.to_string() not in users
+        is_user_in_room = await self.store.check_local_user_in_room(
+            user_id=user.to_string(), room_id=event.room_id
+        )
+        # The user is peeking if they aren't in the room already
+        is_peeking = not is_user_in_room
 
         filtered = await filter_events_for_client(
             self._storage_controllers, user.to_string(), [event], is_peeking=is_peeking
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index acd3de06f6..72157d5a36 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -761,8 +761,10 @@ class EventCreationHandler:
     async def _is_server_notices_room(self, room_id: str) -> bool:
         if self.config.servernotices.server_notices_mxid is None:
             return False
-        user_ids = await self.store.get_users_in_room(room_id)
-        return self.config.servernotices.server_notices_mxid in user_ids
+        is_server_notices_room = await self.store.check_local_user_in_room(
+            user_id=self.config.servernotices.server_notices_mxid, room_id=room_id
+        )
+        return is_server_notices_room
 
     async def assert_accepted_privacy_policy(self, requester: Requester) -> None:
         """Check if a user has accepted the privacy policy
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 2bf0ebd025..2fc8264858 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -1284,8 +1284,11 @@ class RoomContextHandler:
         before_limit = math.floor(limit / 2.0)
         after_limit = limit - before_limit
 
-        users = await self.store.get_users_in_room(room_id)
-        is_peeking = user.to_string() not in users
+        is_user_in_room = await self.store.check_local_user_in_room(
+            user_id=user.to_string(), room_id=room_id
+        )
+        # The user is peeking if they aren't in the room already
+        is_peeking = not is_user_in_room
 
         async def filter_evts(events: List[EventBase]) -> List[EventBase]:
             if use_admin_priviledge:
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 65b9a655d4..709682622f 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -1620,8 +1620,10 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
     async def _is_server_notice_room(self, room_id: str) -> bool:
         if self._server_notices_mxid is None:
             return False
-        user_ids = await self.store.get_users_in_room(room_id)
-        return self._server_notices_mxid in user_ids
+        is_server_notices_room = await self.store.check_local_user_in_room(
+            user_id=self._server_notices_mxid, room_id=room_id
+        )
+        return is_server_notices_room
 
 
 class RoomMemberMasterHandler(RoomMemberHandler):
diff --git a/synapse/server_notices/server_notices_manager.py b/synapse/server_notices/server_notices_manager.py
index 70d054a8f4..564e3705c2 100644
--- a/synapse/server_notices/server_notices_manager.py
+++ b/synapse/server_notices/server_notices_manager.py
@@ -102,6 +102,10 @@ class ServerNoticesManager:
         Returns:
             The room's ID, or None if no room could be found.
         """
+        # If there is no server notices MXID, then there is no server notices room
+        if self.server_notices_mxid is None:
+            return None
+
         rooms = await self._store.get_rooms_for_local_user_where_membership_is(
             user_id, [Membership.INVITE, Membership.JOIN]
         )
@@ -111,8 +115,10 @@ class ServerNoticesManager:
             # be joined. This is kinda deliberate, in that if somebody somehow
             # manages to invite the system user to a room, that doesn't make it
             # the server notices room.
-            user_ids = await self._store.get_users_in_room(room.room_id)
-            if len(user_ids) <= 2 and self.server_notices_mxid in user_ids:
+            is_server_notices_room = await self._store.check_local_user_in_room(
+                user_id=self.server_notices_mxid, room_id=room.room_id
+            )
+            if is_server_notices_room:
                 # we found a room which our user shares with the system notice
                 # user
                 return room.room_id
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 046ad3a11c..9e5034b401 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -534,6 +534,32 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             desc="get_local_users_in_room",
         )
 
+    async def check_local_user_in_room(self, user_id: str, room_id: str) -> bool:
+        """
+        Check whether a given local user is currently joined to the given room.
+
+        Returns:
+            A boolean indicating whether the user is currently joined to the room
+
+        Raises:
+            Exeption when called with a non-local user to this homeserver
+        """
+        if not self.hs.is_mine_id(user_id):
+            raise Exception(
+                "Cannot call 'check_local_user_in_room' on "
+                "non-local user %s" % (user_id,),
+            )
+
+        (
+            membership,
+            member_event_id,
+        ) = await self.get_local_current_membership_for_user_in_room(
+            user_id=user_id,
+            room_id=room_id,
+        )
+
+        return membership == Membership.JOIN
+
     async def get_local_current_membership_for_user_in_room(
         self, user_id: str, room_id: str
     ) -> Tuple[Optional[str], Optional[str]]:
diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py
index d589f07314..651f4f415d 100644
--- a/tests/rest/client/test_relations.py
+++ b/tests/rest/client/test_relations.py
@@ -999,7 +999,7 @@ class BundledAggregationsTestCase(BaseRelationsTestCase):
                 bundled_aggregations,
             )
 
-        self._test_bundled_aggregations(RelationTypes.ANNOTATION, assert_annotations, 6)
+        self._test_bundled_aggregations(RelationTypes.ANNOTATION, assert_annotations, 7)
 
     def test_annotation_to_annotation(self) -> None:
         """Any relation to an annotation should be ignored."""
@@ -1035,7 +1035,7 @@ class BundledAggregationsTestCase(BaseRelationsTestCase):
                 bundled_aggregations,
             )
 
-        self._test_bundled_aggregations(RelationTypes.REFERENCE, assert_annotations, 6)
+        self._test_bundled_aggregations(RelationTypes.REFERENCE, assert_annotations, 7)
 
     def test_thread(self) -> None:
         """
@@ -1080,21 +1080,21 @@ class BundledAggregationsTestCase(BaseRelationsTestCase):
 
         # The "user" sent the root event and is making queries for the bundled
         # aggregations: they have participated.
-        self._test_bundled_aggregations(RelationTypes.THREAD, _gen_assert(True), 8)
+        self._test_bundled_aggregations(RelationTypes.THREAD, _gen_assert(True), 9)
         # The "user2" sent replies in the thread and is making queries for the
         # bundled aggregations: they have participated.
         #
         # Note that this re-uses some cached values, so the total number of
         # queries is much smaller.
         self._test_bundled_aggregations(
-            RelationTypes.THREAD, _gen_assert(True), 2, access_token=self.user2_token
+            RelationTypes.THREAD, _gen_assert(True), 3, access_token=self.user2_token
         )
 
         # A user with no interactions with the thread: they have not participated.
         user3_id, user3_token = self._create_user("charlie")
         self.helper.join(self.room, user=user3_id, tok=user3_token)
         self._test_bundled_aggregations(
-            RelationTypes.THREAD, _gen_assert(False), 2, access_token=user3_token
+            RelationTypes.THREAD, _gen_assert(False), 3, access_token=user3_token
         )
 
     def test_thread_with_bundled_aggregations_for_latest(self) -> None:
@@ -1142,7 +1142,7 @@ class BundledAggregationsTestCase(BaseRelationsTestCase):
                 bundled_aggregations["latest_event"].get("unsigned"),
             )
 
-        self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 8)
+        self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 9)
 
     def test_nested_thread(self) -> None:
         """
-- 
cgit 1.5.1


From 1a209efdb2a6c51e52dd277de7581099852877ae Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Wed, 24 Aug 2022 14:15:37 -0500
Subject: Update `get_users_in_room` mis-use to get hosts with dedicated
 `get_current_hosts_in_room` (#13605)

See https://github.com/matrix-org/synapse/pull/13575#discussion_r953023755
---
 changelog.d/13605.misc                     |  1 +
 synapse/handlers/device.py                 |  8 ++++++--
 synapse/handlers/directory.py              | 12 +++++++-----
 synapse/handlers/presence.py               |  3 +--
 synapse/handlers/typing.py                 |  7 ++++---
 tests/federation/test_federation_sender.py | 17 ++++++++++++-----
 6 files changed, 31 insertions(+), 17 deletions(-)
 create mode 100644 changelog.d/13605.misc

(limited to 'synapse')

diff --git a/changelog.d/13605.misc b/changelog.d/13605.misc
new file mode 100644
index 0000000000..88d518383b
--- /dev/null
+++ b/changelog.d/13605.misc
@@ -0,0 +1 @@
+Refactor `get_users_in_room(room_id)` mis-use with dedicated `get_current_hosts_in_room(room_id)` function.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index f5c586f657..9c2c3a0e68 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -310,6 +310,7 @@ class DeviceHandler(DeviceWorkerHandler):
         super().__init__(hs)
 
         self.federation_sender = hs.get_federation_sender()
+        self._storage_controllers = hs.get_storage_controllers()
 
         self.device_list_updater = DeviceListUpdater(hs, self)
 
@@ -694,8 +695,11 @@ class DeviceHandler(DeviceWorkerHandler):
 
                     # Ignore any users that aren't ours
                     if self.hs.is_mine_id(user_id):
-                        joined_user_ids = await self.store.get_users_in_room(room_id)
-                        hosts = {get_domain_from_id(u) for u in joined_user_ids}
+                        hosts = set(
+                            await self._storage_controllers.state.get_current_hosts_in_room(
+                                room_id
+                            )
+                        )
                         hosts.discard(self.server_name)
 
                     # Check if we've already sent this update to some hosts
diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py
index 948f66a94d..7127d5aefc 100644
--- a/synapse/handlers/directory.py
+++ b/synapse/handlers/directory.py
@@ -30,7 +30,7 @@ from synapse.api.errors import (
 from synapse.appservice import ApplicationService
 from synapse.module_api import NOT_SPAM
 from synapse.storage.databases.main.directory import RoomAliasMapping
-from synapse.types import JsonDict, Requester, RoomAlias, get_domain_from_id
+from synapse.types import JsonDict, Requester, RoomAlias
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -83,8 +83,9 @@ class DirectoryHandler:
         # TODO(erikj): Add transactions.
         # TODO(erikj): Check if there is a current association.
         if not servers:
-            users = await self.store.get_users_in_room(room_id)
-            servers = {get_domain_from_id(u) for u in users}
+            servers = await self._storage_controllers.state.get_current_hosts_in_room(
+                room_id
+            )
 
         if not servers:
             raise SynapseError(400, "Failed to get server list")
@@ -287,8 +288,9 @@ class DirectoryHandler:
                 Codes.NOT_FOUND,
             )
 
-        users = await self.store.get_users_in_room(room_id)
-        extra_servers = {get_domain_from_id(u) for u in users}
+        extra_servers = await self._storage_controllers.state.get_current_hosts_in_room(
+            room_id
+        )
         servers_set = set(extra_servers) | set(servers)
 
         # If this server is in the list of servers, return it first.
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 741504ba9f..4e575ffbaa 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -2051,8 +2051,7 @@ async def get_interested_remotes(
     )
 
     for room_id, states in room_ids_to_states.items():
-        user_ids = await store.get_users_in_room(room_id)
-        hosts = {get_domain_from_id(user_id) for user_id in user_ids}
+        hosts = await store.get_current_hosts_in_room(room_id)
         for host in hosts:
             hosts_and_states.setdefault(host, set()).update(states)
 
diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py
index bcac3372a2..a4cd8b8f0c 100644
--- a/synapse/handlers/typing.py
+++ b/synapse/handlers/typing.py
@@ -26,7 +26,7 @@ from synapse.metrics.background_process_metrics import (
 )
 from synapse.replication.tcp.streams import TypingStream
 from synapse.streams import EventSource
-from synapse.types import JsonDict, Requester, StreamKeyType, UserID, get_domain_from_id
+from synapse.types import JsonDict, Requester, StreamKeyType, UserID
 from synapse.util.caches.stream_change_cache import StreamChangeCache
 from synapse.util.metrics import Measure
 from synapse.util.wheel_timer import WheelTimer
@@ -362,8 +362,9 @@ class TypingWriterHandler(FollowerTypingHandler):
             )
             return
 
-        users = await self.store.get_users_in_room(room_id)
-        domains = {get_domain_from_id(u) for u in users}
+        domains = await self._storage_controllers.state.get_current_hosts_in_room(
+            room_id
+        )
 
         if self.server_name in domains:
             logger.info("Got typing update from %s: %r", user_id, content)
diff --git a/tests/federation/test_federation_sender.py b/tests/federation/test_federation_sender.py
index 01a1db6115..a5aa500ef8 100644
--- a/tests/federation/test_federation_sender.py
+++ b/tests/federation/test_federation_sender.py
@@ -173,17 +173,24 @@ class FederationSenderDevicesTestCases(HomeserverTestCase):
         return c
 
     def prepare(self, reactor, clock, hs):
-        # stub out `get_rooms_for_user` and `get_users_in_room` so that the
+        test_room_id = "!room:host1"
+
+        # stub out `get_rooms_for_user` and `get_current_hosts_in_room` so that the
         # server thinks the user shares a room with `@user2:host2`
         def get_rooms_for_user(user_id):
-            return defer.succeed({"!room:host1"})
+            return defer.succeed({test_room_id})
 
         hs.get_datastores().main.get_rooms_for_user = get_rooms_for_user
 
-        def get_users_in_room(room_id):
-            return defer.succeed({"@user2:host2"})
+        async def get_current_hosts_in_room(room_id):
+            if room_id == test_room_id:
+                return ["host2"]
+
+            # TODO: We should fail the test when we encounter an unxpected room ID.
+            # We can't just use `self.fail(...)` here because the app code is greedy
+            # with `Exception` and will catch it before the test can see it.
 
-        hs.get_datastores().main.get_users_in_room = get_users_in_room
+        hs.get_datastores().main.get_current_hosts_in_room = get_current_hosts_in_room
 
         # whenever send_transaction is called, record the edu data
         self.edus = []
-- 
cgit 1.5.1


From c406d50d2df3c04e695b826e11c79b3d6326b5ec Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 24 Aug 2022 21:06:31 +0100
Subject: Rename `event_map` to `unpersisted_events` (#13603)

---
 changelog.d/13603.misc |  1 +
 synapse/state/v2.py    | 69 +++++++++++++++++++++++++++-----------------------
 2 files changed, 38 insertions(+), 32 deletions(-)
 create mode 100644 changelog.d/13603.misc

(limited to 'synapse')

diff --git a/changelog.d/13603.misc b/changelog.d/13603.misc
new file mode 100644
index 0000000000..d08eb6cc0a
--- /dev/null
+++ b/changelog.d/13603.misc
@@ -0,0 +1 @@
+Rename `event_map` to `unpersisted_events` when computing the auth differences.
diff --git a/synapse/state/v2.py b/synapse/state/v2.py
index cf3045f82e..af03851c71 100644
--- a/synapse/state/v2.py
+++ b/synapse/state/v2.py
@@ -271,40 +271,41 @@ async def _get_power_level_for_sender(
 async def _get_auth_chain_difference(
     room_id: str,
     state_sets: Sequence[Mapping[Any, str]],
-    event_map: Dict[str, EventBase],
+    unpersisted_events: Dict[str, EventBase],
     state_res_store: StateResolutionStore,
 ) -> Set[str]:
     """Compare the auth chains of each state set and return the set of events
-    that only appear in some but not all of the auth chains.
+    that only appear in some, but not all of the auth chains.
 
     Args:
-        state_sets
-        event_map
-        state_res_store
+        state_sets: The input state sets we are trying to resolve across.
+        unpersisted_events: A map from event ID to EventBase containing all unpersisted
+            events involved in this resolution.
+        state_res_store:
 
     Returns:
-        Set of event IDs
+        The auth difference of the given state sets, as a set of event IDs.
     """
 
     # The `StateResolutionStore.get_auth_chain_difference` function assumes that
     # all events passed to it (and their auth chains) have been persisted
-    # previously. This is not the case for any events in the `event_map`, and so
-    # we need to manually handle those events.
+    # previously. We need to manually handle any other events that are yet to be
+    # persisted.
     #
-    # We do this by:
-    #   1. calculating the auth chain difference for the state sets based on the
-    #      events in `event_map` alone
-    #   2. replacing any events in the state_sets that are also in `event_map`
-    #      with their auth events (recursively), and then calling
-    #      `store.get_auth_chain_difference` as normal
-    #   3. adding the results of 1 and 2 together.
-
-    # Map from event ID in `event_map` to their auth event IDs, and their auth
-    # event IDs if they appear in the `event_map`. This is the intersection of
-    # the event's auth chain with the events in the `event_map` *plus* their
+    # We do this in three steps:
+    #   1. Compute the set of unpersisted events belonging to the auth difference.
+    #   2. Replacing any unpersisted events in the state_sets with their auth events,
+    #      recursively, until the state_sets contain only persisted events.
+    #      Then we call `store.get_auth_chain_difference` as normal, which computes
+    #      the set of persisted events belonging to the auth difference.
+    #   3. Adding the results of 1 and 2 together.
+
+    # Map from event ID in `unpersisted_events` to their auth event IDs, and their auth
+    # event IDs if they appear in the `unpersisted_events`. This is the intersection of
+    # the event's auth chain with the events in `unpersisted_events` *plus* their
     # auth event IDs.
     events_to_auth_chain: Dict[str, Set[str]] = {}
-    for event in event_map.values():
+    for event in unpersisted_events.values():
         chain = {event.event_id}
         events_to_auth_chain[event.event_id] = chain
 
@@ -312,16 +313,16 @@ async def _get_auth_chain_difference(
         while to_search:
             for auth_id in to_search.pop().auth_event_ids():
                 chain.add(auth_id)
-                auth_event = event_map.get(auth_id)
+                auth_event = unpersisted_events.get(auth_id)
                 if auth_event:
                     to_search.append(auth_event)
 
-    # We now a) calculate the auth chain difference for the unpersisted events
-    # and b) work out the state sets to pass to the store.
+    # We now 1) calculate the auth chain difference for the unpersisted events
+    # and 2) work out the state sets to pass to the store.
     #
-    # Note: If the `event_map` is empty (which is the common case), we can do a
+    # Note: If there are no `unpersisted_events` (which is the common case), we can do a
     # much simpler calculation.
-    if event_map:
+    if unpersisted_events:
         # The list of state sets to pass to the store, where each state set is a set
         # of the event ids making up the state. This is similar to `state_sets`,
         # except that (a) we only have event ids, not the complete
@@ -344,14 +345,18 @@ async def _get_auth_chain_difference(
             for event_id in state_set.values():
                 event_chain = events_to_auth_chain.get(event_id)
                 if event_chain is not None:
-                    # We have an event in `event_map`. We add all the auth
-                    # events that it references (that aren't also in `event_map`).
-                    set_ids.update(e for e in event_chain if e not in event_map)
+                    # We have an unpersisted event. We add all the auth
+                    # events that it references which are also unpersisted.
+                    set_ids.update(
+                        e for e in event_chain if e not in unpersisted_events
+                    )
 
                     # We also add the full chain of unpersisted event IDs
                     # referenced by this state set, so that we can work out the
                     # auth chain difference of the unpersisted events.
-                    unpersisted_ids.update(e for e in event_chain if e in event_map)
+                    unpersisted_ids.update(
+                        e for e in event_chain if e in unpersisted_events
+                    )
                 else:
                     set_ids.add(event_id)
 
@@ -361,15 +366,15 @@ async def _get_auth_chain_difference(
         union = unpersisted_set_ids[0].union(*unpersisted_set_ids[1:])
         intersection = unpersisted_set_ids[0].intersection(*unpersisted_set_ids[1:])
 
-        difference_from_event_map: Collection[str] = union - intersection
+        auth_difference_unpersisted_part: Collection[str] = union - intersection
     else:
-        difference_from_event_map = ()
+        auth_difference_unpersisted_part = ()
         state_sets_ids = [set(state_set.values()) for state_set in state_sets]
 
     difference = await state_res_store.get_auth_chain_difference(
         room_id, state_sets_ids
     )
-    difference.update(difference_from_event_map)
+    difference.update(auth_difference_unpersisted_part)
 
     return difference
 
-- 
cgit 1.5.1


From 0bf180cbb43f6e3d489ddaa5d66226b6daeea027 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Wed, 24 Aug 2022 18:59:27 -0500
Subject: Comment about a better future where we can get the state diff between
 two events (#13586)

Split off from https://github.com/matrix-org/synapse/pull/13561

Part of https://github.com/matrix-org/synapse/issues/13356

Mentioned in [internal doc](https://docs.google.com/document/d/1lvUoVfYUiy6UaHB6Rb4HicjaJAU40-APue9Q4vzuW3c/edit#bookmark=id.2tvwz3yhcafh)
---
 changelog.d/13586.misc               | 1 +
 synapse/handlers/federation_event.py | 8 ++++++++
 2 files changed, 9 insertions(+)
 create mode 100644 changelog.d/13586.misc

(limited to 'synapse')

diff --git a/changelog.d/13586.misc b/changelog.d/13586.misc
new file mode 100644
index 0000000000..daa71470f6
--- /dev/null
+++ b/changelog.d/13586.misc
@@ -0,0 +1 @@
+Comment about a better future where we can get the state diff between two events.
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 048c4111f6..ace7adcffb 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -1041,6 +1041,14 @@ class FederationEventHandler:
             InvalidResponseError: if the remote homeserver's response contains fields
                 of the wrong type.
         """
+
+        # It would be better if we could query the difference from our known
+        # state to the given `event_id` so the sending server doesn't have to
+        # send as much and we don't have to process as many events. For example
+        # in a room like #matrix:matrix.org, we get 200k events (77k state_events, 122k
+        # auth_events) from this call.
+        #
+        # Tracked by https://github.com/matrix-org/synapse/issues/13618
         (
             state_event_ids,
             auth_event_ids,
-- 
cgit 1.5.1


From a2ce6144479c9f0400517d505c7288f2ab785a95 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Thu, 25 Aug 2022 15:29:08 +0100
Subject: register_new_matrix_user: read server url from config (#13616)

Fixes https://github.com/matrix-org/synapse/issues/3672:
`https://localhost:8448` is virtually never right.
---
 changelog.d/13616.bugfix                     |  1 +
 synapse/_scripts/register_new_matrix_user.py | 57 +++++++++++++++++++++++++---
 2 files changed, 52 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/13616.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13616.bugfix b/changelog.d/13616.bugfix
new file mode 100644
index 0000000000..f2c48d1d8d
--- /dev/null
+++ b/changelog.d/13616.bugfix
@@ -0,0 +1 @@
+Fix a longstanding bug in `register_new_matrix_user` which meant it was always necessary to explicitly give a server URL.
diff --git a/synapse/_scripts/register_new_matrix_user.py b/synapse/_scripts/register_new_matrix_user.py
index 092601f530..42ae0dbea3 100644
--- a/synapse/_scripts/register_new_matrix_user.py
+++ b/synapse/_scripts/register_new_matrix_user.py
@@ -20,11 +20,13 @@ import hashlib
 import hmac
 import logging
 import sys
-from typing import Callable, Optional
+from typing import Any, Callable, Dict, Optional
 
 import requests
 import yaml
 
+_DEFAULT_SERVER_URL = "http://localhost:8008"
+
 
 def request_registration(
     user: str,
@@ -203,31 +205,74 @@ def main() -> None:
 
     parser.add_argument(
         "server_url",
-        default="https://localhost:8448",
         nargs="?",
-        help="URL to use to talk to the homeserver. Defaults to "
-        " 'https://localhost:8448'.",
+        help="URL to use to talk to the homeserver. By default, tries to find a "
+        "suitable URL from the configuration file. Otherwise, defaults to "
+        f"'{_DEFAULT_SERVER_URL}'.",
     )
 
     args = parser.parse_args()
 
     if "config" in args and args.config:
         config = yaml.safe_load(args.config)
+
+    if args.shared_secret:
+        secret = args.shared_secret
+    else:
+        # argparse should check that we have either config or shared secret
+        assert config
+
         secret = config.get("registration_shared_secret", None)
         if not secret:
             print("No 'registration_shared_secret' defined in config.")
             sys.exit(1)
+
+    if args.server_url:
+        server_url = args.server_url
+    elif config:
+        server_url = _find_client_listener(config)
+        if not server_url:
+            server_url = _DEFAULT_SERVER_URL
+            print(
+                "Unable to find a suitable HTTP listener in the configuration file. "
+                f"Trying {server_url} as a last resort.",
+                file=sys.stderr,
+            )
     else:
-        secret = args.shared_secret
+        server_url = _DEFAULT_SERVER_URL
+        print(
+            f"No server url or configuration file given. Defaulting to {server_url}.",
+            file=sys.stderr,
+        )
 
     admin = None
     if args.admin or args.no_admin:
         admin = args.admin
 
     register_new_user(
-        args.user, args.password, args.server_url, secret, admin, args.user_type
+        args.user, args.password, server_url, secret, admin, args.user_type
     )
 
 
+def _find_client_listener(config: Dict[str, Any]) -> Optional[str]:
+    # try to find a listener in the config. Returns a host:port pair
+    for listener in config.get("listeners", []):
+        if listener.get("type") != "http" or listener.get("tls", False):
+            continue
+
+        if not any(
+            name == "client"
+            for resource in listener.get("resources", [])
+            for name in resource.get("names", [])
+        ):
+            continue
+
+        # TODO: consider bind_addresses
+        return f"http://localhost:{listener['port']}"
+
+    # no suitable listeners?
+    return None
+
+
 if __name__ == "__main__":
     main()
-- 
cgit 1.5.1


From d092e6f32a1a3d79337774746720a73762a35e8e Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Thu, 25 Aug 2022 17:27:46 +0100
Subject: Support `registration_shared_secret` in a file (#13614)

A new `registration_shared_secret_path` option. This is kinda handy for k8s deployments and things.
---
 changelog.d/13614.feature                        |  1 +
 docs/usage/configuration/config_documentation.md | 18 ++++++++++
 synapse/_scripts/register_new_matrix_user.py     | 45 ++++++++++++++++++++++--
 synapse/config/registration.py                   | 33 +++++++++++++++--
 4 files changed, 92 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/13614.feature

(limited to 'synapse')

diff --git a/changelog.d/13614.feature b/changelog.d/13614.feature
new file mode 100644
index 0000000000..fa177ead09
--- /dev/null
+++ b/changelog.d/13614.feature
@@ -0,0 +1 @@
+Support setting the registration shared secret in a file, via a new `registration_shared_secret_path` configuration option.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 8ae018e628..e9ab58854e 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -2124,10 +2124,28 @@ registration_requires_token: true
 If set, allows registration of standard or admin accounts by anyone who
 has the shared secret, even if registration is otherwise disabled.
 
+See also [`registration_shared_secret_path`](#registration_shared_secret_path).
+
 Example configuration:
 ```yaml
 registration_shared_secret: <PRIVATE STRING>
 ```
+
+---
+### `registration_shared_secret_path`
+
+An alternative to [`registration_shared_secret`](#registration_shared_secret):
+allows the shared secret to be specified in an external file.
+
+The file should be a plain text file, containing only the shared secret.
+
+Example configuration:
+```yaml
+registration_shared_secret_file: /path/to/secrets/file
+```
+
+_Added in Synapse 1.67.0._
+
 ---
 ### `bcrypt_rounds`
 
diff --git a/synapse/_scripts/register_new_matrix_user.py b/synapse/_scripts/register_new_matrix_user.py
index 42ae0dbea3..0c4504d5d8 100644
--- a/synapse/_scripts/register_new_matrix_user.py
+++ b/synapse/_scripts/register_new_matrix_user.py
@@ -1,6 +1,6 @@
 # Copyright 2015, 2016 OpenMarket Ltd
 # Copyright 2018 New Vector
-# Copyright 2021 The Matrix.org Foundation C.I.C.
+# Copyright 2021-22 The Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -25,6 +25,15 @@ from typing import Any, Callable, Dict, Optional
 import requests
 import yaml
 
+_CONFLICTING_SHARED_SECRET_OPTS_ERROR = """\
+Conflicting options 'registration_shared_secret' and 'registration_shared_secret_path'
+are both defined in config file.
+"""
+
+_NO_SHARED_SECRET_OPTS_ERROR = """\
+No 'registration_shared_secret' or 'registration_shared_secret_path' defined in config.
+"""
+
 _DEFAULT_SERVER_URL = "http://localhost:8008"
 
 
@@ -222,9 +231,15 @@ def main() -> None:
         # argparse should check that we have either config or shared secret
         assert config
 
-        secret = config.get("registration_shared_secret", None)
+        secret = config.get("registration_shared_secret")
+        secret_file = config.get("registration_shared_secret_path")
+        if secret_file:
+            if secret:
+                print(_CONFLICTING_SHARED_SECRET_OPTS_ERROR, file=sys.stderr)
+                sys.exit(1)
+            secret = _read_file(secret_file, "registration_shared_secret_path").strip()
         if not secret:
-            print("No 'registration_shared_secret' defined in config.")
+            print(_NO_SHARED_SECRET_OPTS_ERROR, file=sys.stderr)
             sys.exit(1)
 
     if args.server_url:
@@ -254,6 +269,30 @@ def main() -> None:
     )
 
 
+def _read_file(file_path: Any, config_path: str) -> str:
+    """Check the given file exists, and read it into a string
+
+    If it does not, exit with an error indicating the problem
+
+    Args:
+        file_path: the file to be read
+        config_path: where in the configuration file_path came from, so that a useful
+           error can be emitted if it does not exist.
+    Returns:
+        content of the file.
+    """
+    if not isinstance(file_path, str):
+        print(f"{config_path} setting is not a string", file=sys.stderr)
+        sys.exit(1)
+
+    try:
+        with open(file_path) as file_stream:
+            return file_stream.read()
+    except OSError as e:
+        print(f"Error accessing file {file_path}: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
 def _find_client_listener(config: Dict[str, Any]) -> Optional[str]:
     # try to find a listener in the config. Returns a host:port pair
     for listener in config.get("listeners", []):
diff --git a/synapse/config/registration.py b/synapse/config/registration.py
index a888d976f2..df1d83dfaa 100644
--- a/synapse/config/registration.py
+++ b/synapse/config/registration.py
@@ -13,10 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import argparse
-from typing import Any, Optional
+from typing import Any, Dict, Optional
 
 from synapse.api.constants import RoomCreationPreset
-from synapse.config._base import Config, ConfigError
+from synapse.config._base import Config, ConfigError, read_file
 from synapse.types import JsonDict, RoomAlias, UserID
 from synapse.util.stringutils import random_string_with_symbols, strtobool
 
@@ -27,6 +27,11 @@ password resets, configure Synapse with an SMTP server via the `email` setting,
 remove `account_threepid_delegates.email`.
 """
 
+CONFLICTING_SHARED_SECRET_OPTS_ERROR = """\
+You have configured both `registration_shared_secret` and
+`registration_shared_secret_path`. These are mutually incompatible.
+"""
+
 
 class RegistrationConfig(Config):
     section = "registration"
@@ -53,7 +58,16 @@ class RegistrationConfig(Config):
         self.enable_registration_token_3pid_bypass = config.get(
             "enable_registration_token_3pid_bypass", False
         )
+
+        # read the shared secret, either inline or from an external file
         self.registration_shared_secret = config.get("registration_shared_secret")
+        registration_shared_secret_path = config.get("registration_shared_secret_path")
+        if registration_shared_secret_path:
+            if self.registration_shared_secret:
+                raise ConfigError(CONFLICTING_SHARED_SECRET_OPTS_ERROR)
+            self.registration_shared_secret = read_file(
+                registration_shared_secret_path, ("registration_shared_secret_path",)
+            ).strip()
 
         self.bcrypt_rounds = config.get("bcrypt_rounds", 12)
 
@@ -218,6 +232,21 @@ class RegistrationConfig(Config):
         else:
             return ""
 
+    def generate_files(self, config: Dict[str, Any], config_dir_path: str) -> None:
+        # if 'registration_shared_secret_path' is specified, and the target file
+        # does not exist, generate it.
+        registration_shared_secret_path = config.get("registration_shared_secret_path")
+        if registration_shared_secret_path and not self.path_exists(
+            registration_shared_secret_path
+        ):
+            print(
+                "Generating registration shared secret file "
+                + registration_shared_secret_path
+            )
+            secret = random_string_with_symbols(50)
+            with open(registration_shared_secret_path, "w") as f:
+                f.write(f"{secret}\n")
+
     @staticmethod
     def add_arguments(parser: argparse.ArgumentParser) -> None:
         reg_group = parser.add_argument_group("registration")
-- 
cgit 1.5.1


From 967d7bad6c1ea97fb234c3e6d0cbc87b73bb7d0a Mon Sep 17 00:00:00 2001
From: Brad Murray <bradtgmurray@gmail.com>
Date: Fri, 26 Aug 2022 03:38:10 -0400
Subject: Move the execution of the retention purge_jobs to the main worker
 (#13632)

Fixes #9927

Signed-off-by: Brad Murray brad@beeper.com
---
 changelog.d/13632.bugfix       | 1 +
 synapse/handlers/pagination.py | 6 ++----
 2 files changed, 3 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/13632.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13632.bugfix b/changelog.d/13632.bugfix
new file mode 100644
index 0000000000..e4b7b403cd
--- /dev/null
+++ b/changelog.d/13632.bugfix
@@ -0,0 +1 @@
+Fix the running of MSC1763 retention purge_jobs in deployments with background jobs running on a worker by forcing them back onto the main worker. Contributed by Brad @ Beeper.
diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index 74e944bce7..a0c39778ab 100644
--- a/synapse/handlers/pagination.py
+++ b/synapse/handlers/pagination.py
@@ -159,11 +159,9 @@ class PaginationHandler:
         self._retention_allowed_lifetime_max = (
             hs.config.retention.retention_allowed_lifetime_max
         )
+        self._is_master = hs.config.worker.worker_app is None
 
-        if (
-            hs.config.worker.run_background_tasks
-            and hs.config.retention.retention_enabled
-        ):
+        if hs.config.retention.retention_enabled and self._is_master:
             # Run the purge jobs described in the configuration file.
             for job in hs.config.retention.retention_purge_jobs:
                 logger.info("Setting up purge job with config: %s", job)
-- 
cgit 1.5.1


From 5e5c8150d798f6929ddedbb39f9f11486558cdbc Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Fri, 26 Aug 2022 12:26:06 +0100
Subject: Generate missing configuration files at startup (#13615)

If things like the signing key file are missing, let's just try to generate
them on startup.

Again, this is useful for k8s-like deployments where we just want to generate
keys on the first run.
---
 changelog.d/13615.feature                        |  1 +
 docs/usage/configuration/config_documentation.md |  8 +++-
 synapse/config/_base.py                          | 59 +++++++++++++++++++-----
 3 files changed, 56 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/13615.feature

(limited to 'synapse')

diff --git a/changelog.d/13615.feature b/changelog.d/13615.feature
new file mode 100644
index 0000000000..c2c568f1eb
--- /dev/null
+++ b/changelog.d/13615.feature
@@ -0,0 +1 @@
+Change the default startup behaviour so that any missing "additional" configuration files (signing key, etc) are generated automatically.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index e9ab58854e..4c59e3dcf2 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -2139,6 +2139,9 @@ allows the shared secret to be specified in an external file.
 
 The file should be a plain text file, containing only the shared secret.
 
+If this file does not exist, Synapse will create a new signing
+key on startup and store it in this file.
+
 Example configuration:
 ```yaml
 registration_shared_secret_file: /path/to/secrets/file
@@ -2555,7 +2558,10 @@ Config options relating to signing keys
 ---
 ### `signing_key_path`
 
-Path to the signing key to sign messages with.
+Path to the signing key to sign events and federation requests with.
+
+*New in Synapse 1.67*: If this file does not exist, Synapse will create a new signing
+key on startup and store it in this file.
 
 Example configuration:
 ```yaml
diff --git a/synapse/config/_base.py b/synapse/config/_base.py
index 7c9cf403ef..1f6362aedd 100644
--- a/synapse/config/_base.py
+++ b/synapse/config/_base.py
@@ -20,6 +20,7 @@ import logging
 import os
 import re
 from collections import OrderedDict
+from enum import Enum, auto
 from hashlib import sha256
 from textwrap import dedent
 from typing import (
@@ -603,18 +604,44 @@ class RootConfig:
             " may specify directories containing *.yaml files.",
         )
 
-        generate_group = parser.add_argument_group("Config generation")
-        generate_group.add_argument(
+        # we nest the mutually-exclusive group inside another group so that the help
+        # text shows them in their own group.
+        generate_mode_group = parser.add_argument_group(
+            "Config generation mode",
+        )
+        generate_mode_exclusive = generate_mode_group.add_mutually_exclusive_group()
+        generate_mode_exclusive.add_argument(
+            # hidden option to make the type and default work
+            "--generate-mode",
+            help=argparse.SUPPRESS,
+            type=_ConfigGenerateMode,
+            default=_ConfigGenerateMode.GENERATE_MISSING_AND_RUN,
+        )
+        generate_mode_exclusive.add_argument(
             "--generate-config",
-            action="store_true",
             help="Generate a config file, then exit.",
+            action="store_const",
+            const=_ConfigGenerateMode.GENERATE_EVERYTHING_AND_EXIT,
+            dest="generate_mode",
         )
-        generate_group.add_argument(
+        generate_mode_exclusive.add_argument(
             "--generate-missing-configs",
             "--generate-keys",
-            action="store_true",
             help="Generate any missing additional config files, then exit.",
+            action="store_const",
+            const=_ConfigGenerateMode.GENERATE_MISSING_AND_EXIT,
+            dest="generate_mode",
         )
+        generate_mode_exclusive.add_argument(
+            "--generate-missing-and-run",
+            help="Generate any missing additional config files, then run. This is the "
+            "default behaviour.",
+            action="store_const",
+            const=_ConfigGenerateMode.GENERATE_MISSING_AND_RUN,
+            dest="generate_mode",
+        )
+
+        generate_group = parser.add_argument_group("Details for --generate-config")
         generate_group.add_argument(
             "-H", "--server-name", help="The server name to generate a config file for."
         )
@@ -670,11 +697,12 @@ class RootConfig:
         config_dir_path = os.path.abspath(config_dir_path)
         data_dir_path = os.getcwd()
 
-        generate_missing_configs = config_args.generate_missing_configs
-
         obj = cls(config_files)
 
-        if config_args.generate_config:
+        if (
+            config_args.generate_mode
+            == _ConfigGenerateMode.GENERATE_EVERYTHING_AND_EXIT
+        ):
             if config_args.report_stats is None:
                 parser.error(
                     "Please specify either --report-stats=yes or --report-stats=no\n\n"
@@ -732,11 +760,14 @@ class RootConfig:
                     )
                     % (config_path,)
                 )
-                generate_missing_configs = True
 
         config_dict = read_config_files(config_files)
-        if generate_missing_configs:
-            obj.generate_missing_files(config_dict, config_dir_path)
+        obj.generate_missing_files(config_dict, config_dir_path)
+
+        if config_args.generate_mode in (
+            _ConfigGenerateMode.GENERATE_EVERYTHING_AND_EXIT,
+            _ConfigGenerateMode.GENERATE_MISSING_AND_EXIT,
+        ):
             return None
 
         obj.parse_config_dict(
@@ -965,6 +996,12 @@ def read_file(file_path: Any, config_path: Iterable[str]) -> str:
         raise ConfigError("Error accessing file %r" % (file_path,), config_path) from e
 
 
+class _ConfigGenerateMode(Enum):
+    GENERATE_MISSING_AND_RUN = auto()
+    GENERATE_MISSING_AND_EXIT = auto()
+    GENERATE_EVERYTHING_AND_EXIT = auto()
+
+
 __all__ = [
     "Config",
     "RootConfig",
-- 
cgit 1.5.1


From 51d732db3b4ab13eb58e937a546abce7968112ef Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 30 Aug 2022 01:38:14 -0500
Subject: Optimize how we calculate `likely_domains` during backfill (#13575)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Optimize how we calculate `likely_domains` during backfill because I've seen this take 17s in production just to `get_current_state` which is used to `get_domains_from_state` (see case [*2. Loading tons of events* in the `/messages` investigation issue](https://github.com/matrix-org/synapse/issues/13356)).

There are 3 ways we currently calculate hosts that are in the room:

 1. `get_current_state` -> `get_domains_from_state`
    - Used in `backfill` to calculate `likely_domains` and `/timestamp_to_event` because it was cargo-culted from `backfill`
    - This one is being eliminated in favor of `get_current_hosts_in_room` in this PR 🕳
 1. `get_current_hosts_in_room`
    - Used for other federation things like sending read receipts and typing indicators
 1. `get_hosts_in_room_at_events`
    - Used when pushing out events over federation to other servers in the `_process_event_queue_loop`

Fix https://github.com/matrix-org/synapse/issues/13626

Part of https://github.com/matrix-org/synapse/issues/13356

Mentioned in [internal doc](https://docs.google.com/document/d/1lvUoVfYUiy6UaHB6Rb4HicjaJAU40-APue9Q4vzuW3c/edit#bookmark=id.2tvwz3yhcafh)


### Query performance

#### Before

The query from `get_current_state` sucks just because we have to get all 80k events. And we see almost the exact same performance locally trying to get all of these events (16s vs 17s):
```
synapse=# SELECT type, state_key, event_id FROM current_state_events WHERE room_id = '!OGEhHVWSdvArJzumhm:matrix.org';
Time: 16035.612 ms (00:16.036)

synapse=# SELECT type, state_key, event_id FROM current_state_events WHERE room_id = '!OGEhHVWSdvArJzumhm:matrix.org';
Time: 4243.237 ms (00:04.243)
```

But what about `get_current_hosts_in_room`: When there is 8M rows in the `current_state_events` table, the previous query in `get_current_hosts_in_room` took 13s from complete freshness (when the events were first added). But takes 930ms after a Postgres restart or 390ms if running back to back to back.

```sh
$ psql synapse
synapse=# \timing on
synapse=# SELECT COUNT(DISTINCT substring(state_key FROM '@[^:]*:(.*)$'))
FROM current_state_events
WHERE
    type = 'm.room.member'
    AND membership = 'join'
    AND room_id = '!OGEhHVWSdvArJzumhm:matrix.org';
 count
-------
  4130
(1 row)

Time: 13181.598 ms (00:13.182)

synapse=# SELECT COUNT(*) from current_state_events where room_id = '!OGEhHVWSdvArJzumhm:matrix.org';
 count
-------
 80814

synapse=# SELECT COUNT(*) from current_state_events;
  count
---------
 8162847

synapse=# SELECT pg_size_pretty( pg_total_relation_size('current_state_events') );
 pg_size_pretty
----------------
 4702 MB
```

#### After

I'm not sure how long it takes from complete freshness as I only really get that opportunity once (maybe restarting computer but that's cumbersome) and it's not really relevant to normal operating times. Maybe you get closer to the fresh times the more access variability there is so that Postgres caches aren't as exact. Update: The longest I've seen this run for is 6.4s and 4.5s after a computer restart.

After a Postgres restart, it takes 330ms and running back to back takes 260ms.

```sh
$ psql synapse
synapse=# \timing on
Timing is on.
synapse=# SELECT
    substring(c.state_key FROM '@[^:]*:(.*)$') as host
FROM current_state_events c
/* Get the depth of the event from the events table */
INNER JOIN events AS e USING (event_id)
WHERE
    c.type = 'm.room.member'
    AND c.membership = 'join'
    AND c.room_id = '!OGEhHVWSdvArJzumhm:matrix.org'
GROUP BY host
ORDER BY min(e.depth) ASC;
Time: 333.800 ms
```

#### Going further

To improve things further we could add a `limit` parameter to `get_current_hosts_in_room`. Realistically, we don't need 4k domains to choose from because there is no way we're going to query that many before we a) probably get an answer or b) we give up.

Another thing we can do is optimize the query to use a index skip scan:

 - https://wiki.postgresql.org/wiki/Loose_indexscan
 - Index Skip Scan, https://commitfest.postgresql.org/37/1741/
 - https://www.timescale.com/blog/how-we-made-distinct-queries-up-to-8000x-faster-on-postgresql/
---
 changelog.d/13575.misc                       |  1 +
 synapse/handlers/federation.py               | 53 ++++-------------
 synapse/handlers/room.py                     | 14 ++---
 synapse/storage/controllers/state.py         |  3 +-
 synapse/storage/databases/main/roommember.py | 88 ++++++++++++++++++++++------
 5 files changed, 89 insertions(+), 70 deletions(-)
 create mode 100644 changelog.d/13575.misc

(limited to 'synapse')

diff --git a/changelog.d/13575.misc b/changelog.d/13575.misc
new file mode 100644
index 0000000000..3841472617
--- /dev/null
+++ b/changelog.d/13575.misc
@@ -0,0 +1 @@
+Optimize how Synapse calculates domains to fetch from during backfill.
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index e151962055..dd4b9f66d1 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -70,7 +70,7 @@ from synapse.replication.http.federation import (
 from synapse.storage.databases.main.events import PartialStateConflictError
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
 from synapse.storage.state import StateFilter
-from synapse.types import JsonDict, StateMap, get_domain_from_id
+from synapse.types import JsonDict, get_domain_from_id
 from synapse.util.async_helpers import Linearizer
 from synapse.util.retryutils import NotRetryingDestination
 from synapse.visibility import filter_events_for_server
@@ -104,37 +104,6 @@ backfill_processing_before_timer = Histogram(
 )
 
 
-def get_domains_from_state(state: StateMap[EventBase]) -> List[Tuple[str, int]]:
-    """Get joined domains from state
-
-    Args:
-        state: State map from type/state key to event.
-
-    Returns:
-        Returns a list of servers with the lowest depth of their joins.
-            Sorted by lowest depth first.
-    """
-    joined_users = [
-        (state_key, int(event.depth))
-        for (e_type, state_key), event in state.items()
-        if e_type == EventTypes.Member and event.membership == Membership.JOIN
-    ]
-
-    joined_domains: Dict[str, int] = {}
-    for u, d in joined_users:
-        try:
-            dom = get_domain_from_id(u)
-            old_d = joined_domains.get(dom)
-            if old_d:
-                joined_domains[dom] = min(d, old_d)
-            else:
-                joined_domains[dom] = d
-        except Exception:
-            pass
-
-    return sorted(joined_domains.items(), key=lambda d: d[1])
-
-
 class _BackfillPointType(Enum):
     # a regular backwards extremity (ie, an event which we don't yet have, but which
     # is referred to by other events in the DAG)
@@ -432,21 +401,19 @@ class FederationHandler:
         )
 
         # Now we need to decide which hosts to hit first.
-
-        # First we try hosts that are already in the room
+        # First we try hosts that are already in the room.
         # TODO: HEURISTIC ALERT.
+        likely_domains = (
+            await self._storage_controllers.state.get_current_hosts_in_room(room_id)
+        )
 
-        curr_state = await self._storage_controllers.state.get_current_state(room_id)
-
-        curr_domains = get_domains_from_state(curr_state)
-
-        likely_domains = [
-            domain for domain, depth in curr_domains if domain != self.server_name
-        ]
-
-        async def try_backfill(domains: List[str]) -> bool:
+        async def try_backfill(domains: Collection[str]) -> bool:
             # TODO: Should we try multiple of these at a time?
             for dom in domains:
+                # We don't want to ask our own server for information we don't have
+                if dom == self.server_name:
+                    continue
+
                 try:
                     await self._federation_event_handler.backfill(
                         dom, room_id, limit=100, extremities=extremities_to_request
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 2fc8264858..f64a8690a5 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -60,7 +60,6 @@ from synapse.event_auth import validate_event_for_room_version
 from synapse.events import EventBase
 from synapse.events.utils import copy_and_fixup_power_levels_contents
 from synapse.federation.federation_client import InvalidResponseError
-from synapse.handlers.federation import get_domains_from_state
 from synapse.handlers.relations import BundledAggregations
 from synapse.module_api import NOT_SPAM
 from synapse.rest.admin._base import assert_user_is_admin
@@ -1462,17 +1461,16 @@ class TimestampLookupHandler:
                 timestamp,
             )
 
-            # Find other homeservers from the given state in the room
-            curr_state = await self._storage_controllers.state.get_current_state(
-                room_id
+            likely_domains = (
+                await self._storage_controllers.state.get_current_hosts_in_room(room_id)
             )
-            curr_domains = get_domains_from_state(curr_state)
-            likely_domains = [
-                domain for domain, depth in curr_domains if domain != self.server_name
-            ]
 
             # Loop through each homeserver candidate until we get a succesful response
             for domain in likely_domains:
+                # We don't want to ask our own server for information we don't have
+                if domain == self.server_name:
+                    continue
+
                 try:
                     remote_response = await self.federation_client.timestamp_to_event(
                         domain, room_id, timestamp, direction
diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py
index f9ffd0e29e..ba5380ce3e 100644
--- a/synapse/storage/controllers/state.py
+++ b/synapse/storage/controllers/state.py
@@ -23,7 +23,6 @@ from typing import (
     List,
     Mapping,
     Optional,
-    Set,
     Tuple,
 )
 
@@ -520,7 +519,7 @@ class StateStorageController:
         )
         return state_map.get(key)
 
-    async def get_current_hosts_in_room(self, room_id: str) -> Set[str]:
+    async def get_current_hosts_in_room(self, room_id: str) -> List[str]:
         """Get current hosts in room based on current state."""
 
         await self._partial_state_room_tracker.await_full_state(room_id)
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 9e5034b401..06500457bd 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -187,27 +187,48 @@ class RoomMemberWorkerStore(EventsWorkerStore):
 
     @cached(max_entries=100000, iterable=True)
     async def get_users_in_room(self, room_id: str) -> List[str]:
+        """
+        Returns a list of users in the room sorted by longest in the room first
+        (aka. with the lowest depth). This is done to match the sort in
+        `get_current_hosts_in_room()` and so we can re-use the cache but it's
+        not horrible to have here either.
+        """
+
         return await self.db_pool.runInteraction(
             "get_users_in_room", self.get_users_in_room_txn, room_id
         )
 
     def get_users_in_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[str]:
+        """
+        Returns a list of users in the room sorted by longest in the room first
+        (aka. with the lowest depth). This is done to match the sort in
+        `get_current_hosts_in_room()` and so we can re-use the cache but it's
+        not horrible to have here either.
+        """
         # If we can assume current_state_events.membership is up to date
         # then we can avoid a join, which is a Very Good Thing given how
         # frequently this function gets called.
         if self._current_state_events_membership_up_to_date:
             sql = """
-                SELECT state_key FROM current_state_events
-                WHERE type = 'm.room.member' AND room_id = ? AND membership = ?
+                SELECT c.state_key FROM current_state_events as c
+                /* Get the depth of the event from the events table */
+                INNER JOIN events AS e USING (event_id)
+                WHERE c.type = 'm.room.member' AND c.room_id = ? AND membership = ?
+                /* Sorted by lowest depth first */
+                ORDER BY e.depth ASC;
             """
         else:
             sql = """
-                SELECT state_key FROM room_memberships as m
+                SELECT c.state_key FROM room_memberships as m
+                /* Get the depth of the event from the events table */
+                INNER JOIN events AS e USING (event_id)
                 INNER JOIN current_state_events as c
                 ON m.event_id = c.event_id
                 AND m.room_id = c.room_id
                 AND m.user_id = c.state_key
                 WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ?
+                /* Sorted by lowest depth first */
+                ORDER BY e.depth ASC;
             """
 
         txn.execute(sql, (room_id, Membership.JOIN))
@@ -1037,37 +1058,70 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         return True
 
     @cached(iterable=True, max_entries=10000)
-    async def get_current_hosts_in_room(self, room_id: str) -> Set[str]:
-        """Get current hosts in room based on current state."""
+    async def get_current_hosts_in_room(self, room_id: str) -> List[str]:
+        """
+        Get current hosts in room based on current state.
+
+        The heuristic of sorting by servers who have been in the room the
+        longest is good because they're most likely to have anything we ask
+        about.
+
+        Returns:
+            Returns a list of servers sorted by longest in the room first. (aka.
+            sorted by join with the lowest depth first).
+        """
 
         # First we check if we already have `get_users_in_room` in the cache, as
         # we can just calculate result from that
         users = self.get_users_in_room.cache.get_immediate(
             (room_id,), None, update_metrics=False
         )
-        if users is not None:
-            return {get_domain_from_id(u) for u in users}
-
-        if isinstance(self.database_engine, Sqlite3Engine):
+        if users is None and isinstance(self.database_engine, Sqlite3Engine):
             # If we're using SQLite then let's just always use
             # `get_users_in_room` rather than funky SQL.
             users = await self.get_users_in_room(room_id)
-            return {get_domain_from_id(u) for u in users}
+
+        if users is not None:
+            # Because `users` is sorted from lowest -> highest depth, the list
+            # of domains will also be sorted that way.
+            domains: List[str] = []
+            # We use a `Set` just for fast lookups
+            domain_set: Set[str] = set()
+            for u in users:
+                domain = get_domain_from_id(u)
+                if domain not in domain_set:
+                    domain_set.add(domain)
+                    domains.append(domain)
+            return domains
 
         # For PostgreSQL we can use a regex to pull out the domains from the
         # joined users in `current_state_events` via regex.
 
-        def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> Set[str]:
+        def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> List[str]:
+            # Returns a list of servers currently joined in the room sorted by
+            # longest in the room first (aka. with the lowest depth). The
+            # heuristic of sorting by servers who have been in the room the
+            # longest is good because they're most likely to have anything we
+            # ask about.
             sql = """
-                SELECT DISTINCT substring(state_key FROM '@[^:]*:(.*)$')
-                FROM current_state_events
+                SELECT
+                    /* Match the domain part of the MXID */
+                    substring(c.state_key FROM '@[^:]*:(.*)$') as server_domain
+                FROM current_state_events c
+                /* Get the depth of the event from the events table */
+                INNER JOIN events AS e USING (event_id)
                 WHERE
-                    type = 'm.room.member'
-                    AND membership = 'join'
-                    AND room_id = ?
+                    /* Find any join state events in the room */
+                    c.type = 'm.room.member'
+                    AND c.membership = 'join'
+                    AND c.room_id = ?
+                /* Group all state events from the same domain into their own buckets (groups) */
+                GROUP BY server_domain
+                /* Sorted by lowest depth first */
+                ORDER BY min(e.depth) ASC;
             """
             txn.execute(sql, (room_id,))
-            return {d for d, in txn}
+            return [d for d, in txn]
 
         return await self.db_pool.runInteraction(
             "get_current_hosts_in_room", get_current_hosts_in_room_txn
-- 
cgit 1.5.1


From 682dfcfc0db05d9c99b7615d950997535df4d533 Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Tue, 30 Aug 2022 11:58:38 +0200
Subject: Fix that user cannot `/forget` rooms after the last member has left
 (#13546)

---
 changelog.d/13546.bugfix           |  1 +
 synapse/handlers/room_member.py    |  7 ++-
 tests/handlers/test_room_member.py | 93 +++++++++++++++++++++++++++++++++++++-
 tests/storage/test_roommember.py   |  4 +-
 4 files changed, 99 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/13546.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13546.bugfix b/changelog.d/13546.bugfix
new file mode 100644
index 0000000000..83bc3a61d2
--- /dev/null
+++ b/changelog.d/13546.bugfix
@@ -0,0 +1 @@
+Fix bug that user cannot `/forget` rooms after the last member has left the room.
\ No newline at end of file
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 709682622f..e726997d83 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -1925,8 +1925,11 @@ class RoomMemberMasterHandler(RoomMemberHandler):
         ]:
             raise SynapseError(400, "User %s in room %s" % (user_id, room_id))
 
-        if membership:
-            await self.store.forget(user_id, room_id)
+        # In normal case this call is only required if `membership` is not `None`.
+        # But: After the last member had left the room, the background update
+        # `_background_remove_left_rooms` is deleting rows related to this room from
+        # the table `current_state_events` and `get_current_state_events` is `None`.
+        await self.store.forget(user_id, room_id)
 
 
 def get_users_which_can_issue_invite(auth_events: StateMap[EventBase]) -> List[str]:
diff --git a/tests/handlers/test_room_member.py b/tests/handlers/test_room_member.py
index 1d13ed1e88..6bbfd5dc84 100644
--- a/tests/handlers/test_room_member.py
+++ b/tests/handlers/test_room_member.py
@@ -6,7 +6,7 @@ import synapse.rest.admin
 import synapse.rest.client.login
 import synapse.rest.client.room
 from synapse.api.constants import EventTypes, Membership
-from synapse.api.errors import LimitExceededError
+from synapse.api.errors import LimitExceededError, SynapseError
 from synapse.crypto.event_signing import add_hashes_and_signatures
 from synapse.events import FrozenEventV3
 from synapse.federation.federation_client import SendJoinResult
@@ -17,7 +17,11 @@ from synapse.util import Clock
 from tests.replication._base import BaseMultiWorkerStreamTestCase
 from tests.server import make_request
 from tests.test_utils import make_awaitable
-from tests.unittest import FederatingHomeserverTestCase, override_config
+from tests.unittest import (
+    FederatingHomeserverTestCase,
+    HomeserverTestCase,
+    override_config,
+)
 
 
 class TestJoinsLimitedByPerRoomRateLimiter(FederatingHomeserverTestCase):
@@ -287,3 +291,88 @@ class TestReplicatedJoinsLimitedByPerRoomRateLimiter(BaseMultiWorkerStreamTestCa
             ),
             LimitExceededError,
         )
+
+
+class RoomMemberMasterHandlerTestCase(HomeserverTestCase):
+    servlets = [
+        synapse.rest.admin.register_servlets,
+        synapse.rest.client.login.register_servlets,
+        synapse.rest.client.room.register_servlets,
+    ]
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.handler = hs.get_room_member_handler()
+        self.store = hs.get_datastores().main
+
+        # Create two users.
+        self.alice = self.register_user("alice", "pass")
+        self.alice_ID = UserID.from_string(self.alice)
+        self.alice_token = self.login("alice", "pass")
+        self.bob = self.register_user("bob", "pass")
+        self.bob_ID = UserID.from_string(self.bob)
+        self.bob_token = self.login("bob", "pass")
+
+        # Create a room on this homeserver.
+        self.room_id = self.helper.create_room_as(self.alice, tok=self.alice_token)
+
+    def test_leave_and_forget(self) -> None:
+        """Tests that forget a room is successfully. The test is performed with two users,
+        as forgetting by the last user respectively after all users had left the
+        is a special edge case."""
+        self.helper.join(self.room_id, user=self.bob, tok=self.bob_token)
+
+        # alice is not the last room member that leaves and forgets the room
+        self.helper.leave(self.room_id, user=self.alice, tok=self.alice_token)
+        self.get_success(self.handler.forget(self.alice_ID, self.room_id))
+        self.assertTrue(
+            self.get_success(self.store.did_forget(self.alice, self.room_id))
+        )
+
+        # the server has not forgotten the room
+        self.assertFalse(
+            self.get_success(self.store.is_locally_forgotten_room(self.room_id))
+        )
+
+    def test_leave_and_forget_last_user(self) -> None:
+        """Tests that forget a room is successfully when the last user has left the room."""
+
+        # alice is the last room member that leaves and forgets the room
+        self.helper.leave(self.room_id, user=self.alice, tok=self.alice_token)
+        self.get_success(self.handler.forget(self.alice_ID, self.room_id))
+        self.assertTrue(
+            self.get_success(self.store.did_forget(self.alice, self.room_id))
+        )
+
+        # the server has forgotten the room
+        self.assertTrue(
+            self.get_success(self.store.is_locally_forgotten_room(self.room_id))
+        )
+
+    def test_forget_when_not_left(self) -> None:
+        """Tests that a user cannot not forgets a room that has not left."""
+        self.get_failure(self.handler.forget(self.alice_ID, self.room_id), SynapseError)
+
+    def test_rejoin_forgotten_by_user(self) -> None:
+        """Test that a user that has forgotten a room can do a re-join.
+        The room was not forgotten from the local server.
+        One local user is still member of the room."""
+        self.helper.join(self.room_id, user=self.bob, tok=self.bob_token)
+
+        self.helper.leave(self.room_id, user=self.alice, tok=self.alice_token)
+        self.get_success(self.handler.forget(self.alice_ID, self.room_id))
+        self.assertTrue(
+            self.get_success(self.store.did_forget(self.alice, self.room_id))
+        )
+
+        # the server has not forgotten the room
+        self.assertFalse(
+            self.get_success(self.store.is_locally_forgotten_room(self.room_id))
+        )
+
+        self.helper.join(self.room_id, user=self.alice, tok=self.alice_token)
+        # TODO: A join to a room does not invalidate the forgotten cache
+        # see https://github.com/matrix-org/synapse/issues/13262
+        self.store.did_forget.invalidate_all()
+        self.assertFalse(
+            self.get_success(self.store.did_forget(self.alice, self.room_id))
+        )
diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py
index ceec690285..8794401823 100644
--- a/tests/storage/test_roommember.py
+++ b/tests/storage/test_roommember.py
@@ -158,7 +158,7 @@ class RoomMemberStoreTestCase(unittest.HomeserverTestCase):
         # Check that alice's display name is now None
         self.assertEqual(row[0]["display_name"], None)
 
-    def test_room_is_locally_forgotten(self):
+    def test_room_is_locally_forgotten(self) -> None:
         """Test that when the last local user has forgotten a room it is known as forgotten."""
         # join two local and one remote user
         self.room = self.helper.create_room_as(self.u_alice, tok=self.t_alice)
@@ -199,7 +199,7 @@ class RoomMemberStoreTestCase(unittest.HomeserverTestCase):
             self.get_success(self.store.is_locally_forgotten_room(self.room))
         )
 
-    def test_join_locally_forgotten_room(self):
+    def test_join_locally_forgotten_room(self) -> None:
         """Tests if a user joins a forgotten room the room is not forgotten anymore."""
         self.room = self.helper.create_room_as(self.u_alice, tok=self.t_alice)
         self.assertFalse(
-- 
cgit 1.5.1


From 1eea73b4133da3afc2361592f7b2fa4a4125249d Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 30 Aug 2022 06:08:29 -0500
Subject: Fix rate limit metrics registering twice and misreporting (#13649)

* Fix rate limit metrics registering twice and misreporting

Fix https://github.com/matrix-org/synapse/issues/13641

* Fix lints

* Add changelog

* Document `metrics_name=None`.
---
 changelog.d/13649.bugfix       |   1 +
 synapse/server.py              |   4 +-
 synapse/util/ratelimitutils.py | 155 ++++++++++++++++++++++++++++++++---------
 3 files changed, 128 insertions(+), 32 deletions(-)
 create mode 100644 changelog.d/13649.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13649.bugfix b/changelog.d/13649.bugfix
new file mode 100644
index 0000000000..e6513a585a
--- /dev/null
+++ b/changelog.d/13649.bugfix
@@ -0,0 +1 @@
+Fix rate limit gauge metrics registering twice and misreporting (`synapse_rate_limit_sleep_affected_hosts`, `synapse_rate_limit_reject_affected_hosts`).
diff --git a/synapse/server.py b/synapse/server.py
index 181984a1a4..c2e55bf0b1 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -756,7 +756,9 @@ class HomeServer(metaclass=abc.ABCMeta):
     @cache_in_self
     def get_federation_ratelimiter(self) -> FederationRateLimiter:
         return FederationRateLimiter(
-            self.get_clock(), config=self.config.ratelimiting.rc_federation
+            self.get_clock(),
+            config=self.config.ratelimiting.rc_federation,
+            metrics_name="federation_servlets",
         )
 
     @cache_in_self
diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py
index f678b52cb4..9f64fed0d7 100644
--- a/synapse/util/ratelimitutils.py
+++ b/synapse/util/ratelimitutils.py
@@ -15,10 +15,23 @@
 import collections
 import contextlib
 import logging
+import threading
 import typing
-from typing import Any, DefaultDict, Iterator, List, Set
+from typing import (
+    Any,
+    Callable,
+    DefaultDict,
+    Dict,
+    Iterator,
+    List,
+    Mapping,
+    Optional,
+    Set,
+    Tuple,
+)
 
 from prometheus_client.core import Counter
+from typing_extensions import ContextManager
 
 from twisted.internet import defer
 
@@ -40,12 +53,20 @@ logger = logging.getLogger(__name__)
 
 
 # Track how much the ratelimiter is affecting requests
-rate_limit_sleep_counter = Counter("synapse_rate_limit_sleep", "")
-rate_limit_reject_counter = Counter("synapse_rate_limit_reject", "")
+rate_limit_sleep_counter = Counter(
+    "synapse_rate_limit_sleep",
+    "Number of requests slept by the rate limiter",
+    ["rate_limiter_name"],
+)
+rate_limit_reject_counter = Counter(
+    "synapse_rate_limit_reject",
+    "Number of requests rejected by the rate limiter",
+    ["rate_limiter_name"],
+)
 queue_wait_timer = Histogram(
     "synapse_rate_limit_queue_wait_time_seconds",
-    "sec",
-    [],
+    "Amount of time spent waiting for the rate limiter to let our request through.",
+    ["rate_limiter_name"],
     buckets=(
         0.005,
         0.01,
@@ -65,35 +86,92 @@ queue_wait_timer = Histogram(
 )
 
 
+_rate_limiter_instances: Set["FederationRateLimiter"] = set()
+# Protects the _rate_limiter_instances set from concurrent access
+_rate_limiter_instances_lock = threading.Lock()
+
+
+def _get_counts_from_rate_limiter_instance(
+    count_func: Callable[["FederationRateLimiter"], int]
+) -> Mapping[Tuple[str, ...], int]:
+    """Returns a count of something (slept/rejected hosts) by (metrics_name)"""
+    # Cast to a list to prevent it changing while the Prometheus
+    # thread is collecting metrics
+    with _rate_limiter_instances_lock:
+        rate_limiter_instances = list(_rate_limiter_instances)
+
+    # Map from (metrics_name,) -> int, the number of something like slept hosts
+    # or rejected hosts. The key type is Tuple[str], but we leave the length
+    # unspecified for compatability with LaterGauge's annotations.
+    counts: Dict[Tuple[str, ...], int] = {}
+    for rate_limiter_instance in rate_limiter_instances:
+        # Only track metrics if they provided a `metrics_name` to
+        # differentiate this instance of the rate limiter.
+        if rate_limiter_instance.metrics_name:
+            key = (rate_limiter_instance.metrics_name,)
+            counts[key] = count_func(rate_limiter_instance)
+
+    return counts
+
+
+# We track the number of affected hosts per time-period so we can
+# differentiate one really noisy homeserver from a general
+# ratelimit tuning problem across the federation.
+LaterGauge(
+    "synapse_rate_limit_sleep_affected_hosts",
+    "Number of hosts that had requests put to sleep",
+    ["rate_limiter_name"],
+    lambda: _get_counts_from_rate_limiter_instance(
+        lambda rate_limiter_instance: sum(
+            ratelimiter.should_sleep()
+            for ratelimiter in rate_limiter_instance.ratelimiters.values()
+        )
+    ),
+)
+LaterGauge(
+    "synapse_rate_limit_reject_affected_hosts",
+    "Number of hosts that had requests rejected",
+    ["rate_limiter_name"],
+    lambda: _get_counts_from_rate_limiter_instance(
+        lambda rate_limiter_instance: sum(
+            ratelimiter.should_reject()
+            for ratelimiter in rate_limiter_instance.ratelimiters.values()
+        )
+    ),
+)
+
+
 class FederationRateLimiter:
-    def __init__(self, clock: Clock, config: FederationRatelimitSettings):
+    """Used to rate limit request per-host."""
+
+    def __init__(
+        self,
+        clock: Clock,
+        config: FederationRatelimitSettings,
+        metrics_name: Optional[str] = None,
+    ):
+        """
+        Args:
+            clock
+            config
+            metrics_name: The name of the rate limiter so we can differentiate it
+                from the rest in the metrics. If `None`, we don't track metrics
+                for this rate limiter.
+
+        """
+        self.metrics_name = metrics_name
+
         def new_limiter() -> "_PerHostRatelimiter":
-            return _PerHostRatelimiter(clock=clock, config=config)
+            return _PerHostRatelimiter(
+                clock=clock, config=config, metrics_name=metrics_name
+            )
 
         self.ratelimiters: DefaultDict[
             str, "_PerHostRatelimiter"
         ] = collections.defaultdict(new_limiter)
 
-        # We track the number of affected hosts per time-period so we can
-        # differentiate one really noisy homeserver from a general
-        # ratelimit tuning problem across the federation.
-        LaterGauge(
-            "synapse_rate_limit_sleep_affected_hosts",
-            "Number of hosts that had requests put to sleep",
-            [],
-            lambda: sum(
-                ratelimiter.should_sleep() for ratelimiter in self.ratelimiters.values()
-            ),
-        )
-        LaterGauge(
-            "synapse_rate_limit_reject_affected_hosts",
-            "Number of hosts that had requests rejected",
-            [],
-            lambda: sum(
-                ratelimiter.should_reject()
-                for ratelimiter in self.ratelimiters.values()
-            ),
-        )
+        with _rate_limiter_instances_lock:
+            _rate_limiter_instances.add(self)
 
     def ratelimit(self, host: str) -> "_GeneratorContextManager[defer.Deferred[None]]":
         """Used to ratelimit an incoming request from a given host
@@ -114,13 +192,23 @@ class FederationRateLimiter:
 
 
 class _PerHostRatelimiter:
-    def __init__(self, clock: Clock, config: FederationRatelimitSettings):
+    def __init__(
+        self,
+        clock: Clock,
+        config: FederationRatelimitSettings,
+        metrics_name: Optional[str] = None,
+    ):
         """
         Args:
             clock
             config
+            metrics_name: The name of the rate limiter so we can differentiate it
+                from the rest in the metrics. If `None`, we don't track metrics
+                for this rate limiter.
+                from the rest in the metrics
         """
         self.clock = clock
+        self.metrics_name = metrics_name
 
         self.window_size = config.window_size
         self.sleep_limit = config.sleep_limit
@@ -178,7 +266,10 @@ class _PerHostRatelimiter:
         return len(self.request_times) > self.sleep_limit
 
     async def _on_enter_with_tracing(self, request_id: object) -> None:
-        with start_active_span("ratelimit wait"), queue_wait_timer.time():
+        maybe_metrics_cm: ContextManager = contextlib.nullcontext()
+        if self.metrics_name:
+            maybe_metrics_cm = queue_wait_timer.labels(self.metrics_name).time()
+        with start_active_span("ratelimit wait"), maybe_metrics_cm:
             await self._on_enter(request_id)
 
     def _on_enter(self, request_id: object) -> "defer.Deferred[None]":
@@ -193,7 +284,8 @@ class _PerHostRatelimiter:
         # sleeping or in the ready queue).
         if self.should_reject():
             logger.debug("Ratelimiter(%s): rejecting request", self.host)
-            rate_limit_reject_counter.inc()
+            if self.metrics_name:
+                rate_limit_reject_counter.labels(self.metrics_name).inc()
             raise LimitExceededError(
                 retry_after_ms=int(self.window_size / self.sleep_limit)
             )
@@ -228,7 +320,8 @@ class _PerHostRatelimiter:
                 id(request_id),
                 self.sleep_sec,
             )
-            rate_limit_sleep_counter.inc()
+            if self.metrics_name:
+                rate_limit_sleep_counter.labels(self.metrics_name).inc()
             ret_defer = run_in_background(self.clock.sleep, self.sleep_sec)
 
             self.sleeping_requests.add(request_id)
-- 
cgit 1.5.1


From 20df96a7a7e3d676b0beae12d0eeb1f1d668247e Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 30 Aug 2022 07:12:48 -0400
Subject: Speed up inserting `event_push_actions_staging`. (#13634)

By using `execute_values` instead of `execute_batch`.
---
 changelog.d/13634.feature                          |  1 +
 .../storage/databases/main/event_push_actions.py   | 28 +++++++---------------
 2 files changed, 9 insertions(+), 20 deletions(-)
 create mode 100644 changelog.d/13634.feature

(limited to 'synapse')

diff --git a/changelog.d/13634.feature b/changelog.d/13634.feature
new file mode 100644
index 0000000000..0a8827205d
--- /dev/null
+++ b/changelog.d/13634.feature
@@ -0,0 +1 @@
+Improve performance of sending messages in rooms with thousands of local users.
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 8dfa545c27..9f410d69de 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -700,26 +700,14 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 int(count_as_unread),  # unread column
             )
 
-        def _add_push_actions_to_staging_txn(txn: LoggingTransaction) -> None:
-            # We don't use simple_insert_many here to avoid the overhead
-            # of generating lists of dicts.
-
-            sql = """
-                INSERT INTO event_push_actions_staging
-                    (event_id, user_id, actions, notif, highlight, unread)
-                VALUES (?, ?, ?, ?, ?, ?)
-            """
-
-            txn.execute_batch(
-                sql,
-                (
-                    _gen_entry(user_id, actions)
-                    for user_id, actions in user_id_actions.items()
-                ),
-            )
-
-        return await self.db_pool.runInteraction(
-            "add_push_actions_to_staging", _add_push_actions_to_staging_txn
+        await self.db_pool.simple_insert_many(
+            "event_push_actions_staging",
+            keys=("event_id", "user_id", "actions", "notif", "highlight", "unread"),
+            values=[
+                _gen_entry(user_id, actions)
+                for user_id, actions in user_id_actions.items()
+            ],
+            desc="add_push_actions_to_staging",
         )
 
     async def remove_push_actions_from_staging(self, event_id: str) -> None:
-- 
cgit 1.5.1


From 303b40b988bc372a81fc1a3cf62d3f5d074970ff Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 30 Aug 2022 07:15:54 -0400
Subject: Do not wait for background updates to complete do expire URL cache.
 (#13657)

Media downloaded as part of a URL preview is normally deleted after two days.
However, while a background database migration is running, the process is
stopped. A long-running database migration can therefore cause the media
store to fill up with old preview files.

This logic was added in #2697 to make sure that we didn't try to run the expiry
without an index on `local_media_repository.created_ts`; the original logic that
needs that index was added in #2478 (in `get_url_cache_media_before`, as
amended by 93247a424a5068b088567fa98b6990e47608b7cb), and is still present.

Given that the background update was added before Synapse v1.0.0, just drop
this check and assume the index exists.
---
 changelog.d/13657.bugfix                      | 1 +
 synapse/rest/media/v1/preview_url_resource.py | 4 ----
 2 files changed, 1 insertion(+), 4 deletions(-)
 create mode 100644 changelog.d/13657.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13657.bugfix b/changelog.d/13657.bugfix
new file mode 100644
index 0000000000..d314d9c52f
--- /dev/null
+++ b/changelog.d/13657.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug that downloaded media for URL previews was not deleted while database background updates were running.
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index b36c98a08e..a8f6fd6b35 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -732,10 +732,6 @@ class PreviewUrlResource(DirectServeJsonResource):
 
         logger.debug("Running url preview cache expiry")
 
-        if not (await self.store.db_pool.updates.has_completed_background_updates()):
-            logger.debug("Still running DB updates; skipping url preview cache expiry")
-            return
-
         def try_remove_parent_dirs(dirs: Iterable[str]) -> None:
             """Attempt to remove the given chain of parent directories
 
-- 
cgit 1.5.1


From 1c26acd815a8609314991e539dd99ceb2b9b1b43 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 30 Aug 2022 12:17:48 +0100
Subject: Fix bug where we wedge media plugins if clients disconnect early
 (#13660)

We incorrectly didn't use the returned `Responder` if the client had
disconnected, which meant that the resource used by the Responder
wasn't correctly released.

In particular, this exhausted the thread pools so that *all* requests
timed out.
---
 changelog.d/13660.bugfix       |  1 +
 synapse/rest/media/v1/_base.py | 40 +++++++++++++++++++++-------------------
 2 files changed, 22 insertions(+), 19 deletions(-)
 create mode 100644 changelog.d/13660.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13660.bugfix b/changelog.d/13660.bugfix
new file mode 100644
index 0000000000..43859a4d65
--- /dev/null
+++ b/changelog.d/13660.bugfix
@@ -0,0 +1 @@
+Fix bug where we wedge media plugins if clients disconnect early. Introduced in v1.22.0.
diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py
index c35d42fab8..d30878f704 100644
--- a/synapse/rest/media/v1/_base.py
+++ b/synapse/rest/media/v1/_base.py
@@ -254,30 +254,32 @@ async def respond_with_responder(
         file_size: Size in bytes of the media. If not known it should be None
         upload_name: The name of the requested file, if any.
     """
-    if request._disconnected:
-        logger.warning(
-            "Not sending response to request %s, already disconnected.", request
-        )
-        return
-
     if not responder:
         respond_404(request)
         return
 
-    logger.debug("Responding to media request with responder %s", responder)
-    add_file_headers(request, media_type, file_size, upload_name)
-    try:
-        with responder:
+    # If we have a responder we *must* use it as a context manager.
+    with responder:
+        if request._disconnected:
+            logger.warning(
+                "Not sending response to request %s, already disconnected.", request
+            )
+            return
+
+        logger.debug("Responding to media request with responder %s", responder)
+        add_file_headers(request, media_type, file_size, upload_name)
+        try:
+
             await responder.write_to_consumer(request)
-    except Exception as e:
-        # The majority of the time this will be due to the client having gone
-        # away. Unfortunately, Twisted simply throws a generic exception at us
-        # in that case.
-        logger.warning("Failed to write to consumer: %s %s", type(e), e)
-
-        # Unregister the producer, if it has one, so Twisted doesn't complain
-        if request.producer:
-            request.unregisterProducer()
+        except Exception as e:
+            # The majority of the time this will be due to the client having gone
+            # away. Unfortunately, Twisted simply throws a generic exception at us
+            # in that case.
+            logger.warning("Failed to write to consumer: %s %s", type(e), e)
+
+            # Unregister the producer, if it has one, so Twisted doesn't complain
+            if request.producer:
+                request.unregisterProducer()
 
     finish_request(request)
 
-- 
cgit 1.5.1


From 20c76cecb9eb84dadfa7b2d25b436d3ab9218a1a Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Tue, 30 Aug 2022 10:29:16 -0700
Subject: Drop unused column `application_services_state.last_txn` (#13627)

---
 changelog.d/13627.misc                             |  1 +
 synapse/storage/schema/__init__.py                 |  1 +
 ...pplication_services_state_last_txn.sql.postgres | 17 +++++++++
 ..._application_services_state_last_txn.sql.sqlite | 40 ++++++++++++++++++++++
 4 files changed, 59 insertions(+)
 create mode 100644 changelog.d/13627.misc
 create mode 100644 synapse/storage/schema/main/delta/72/04drop_column_application_services_state_last_txn.sql.postgres
 create mode 100644 synapse/storage/schema/main/delta/72/04drop_column_application_services_state_last_txn.sql.sqlite

(limited to 'synapse')

diff --git a/changelog.d/13627.misc b/changelog.d/13627.misc
new file mode 100644
index 0000000000..1a15709aff
--- /dev/null
+++ b/changelog.d/13627.misc
@@ -0,0 +1 @@
+Drop unused column `application_services_state.last_txn`.
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index a9a88c8bfd..256f745dc0 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -75,6 +75,7 @@ Changes in SCHEMA_VERSION = 71:
 Changes in SCHEMA_VERSION = 72:
     - event_edges.(room_id, is_state) are no longer written to.
     - Tables related to groups are dropped.
+    - Unused column application_services_state.last_txn is dropped
 """
 
 
diff --git a/synapse/storage/schema/main/delta/72/04drop_column_application_services_state_last_txn.sql.postgres b/synapse/storage/schema/main/delta/72/04drop_column_application_services_state_last_txn.sql.postgres
new file mode 100644
index 0000000000..13d47de9e6
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/04drop_column_application_services_state_last_txn.sql.postgres
@@ -0,0 +1,17 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Drop unused column application_services_state.last_txn
+ALTER table application_services_state DROP COLUMN last_txn;
\ No newline at end of file
diff --git a/synapse/storage/schema/main/delta/72/04drop_column_application_services_state_last_txn.sql.sqlite b/synapse/storage/schema/main/delta/72/04drop_column_application_services_state_last_txn.sql.sqlite
new file mode 100644
index 0000000000..3be1c88d72
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/04drop_column_application_services_state_last_txn.sql.sqlite
@@ -0,0 +1,40 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Drop unused column application_services_state.last_txn
+
+CREATE TABLE application_services_state2 (
+    as_id TEXT PRIMARY KEY NOT NULL,
+    state VARCHAR(5),
+    read_receipt_stream_id BIGINT,
+    presence_stream_id BIGINT,
+    to_device_stream_id BIGINT,
+    device_list_stream_id BIGINT
+);
+
+
+INSERT INTO application_services_state2 (
+    as_id,
+    state,
+    read_receipt_stream_id,
+    presence_stream_id,
+    to_device_stream_id,
+    device_list_stream_id
+)
+SELECT as_id, state, read_receipt_stream_id, presence_stream_id, to_device_stream_id,  device_list_stream_id
+FROM application_services_state;
+
+DROP TABLE application_services_state;
+ALTER TABLE application_services_state2 RENAME TO application_services_state;
-- 
cgit 1.5.1


From 92c5817e34cb421dbe2c0df275238e84866932a8 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 30 Aug 2022 14:50:06 -0500
Subject: Give the correct next event when the message timestamps are the same
 - MSC3030 (#13658)

Discovered while working on https://github.com/matrix-org/synapse/pull/13589 and I had all the messages at the same timestamp in the tests.

Part of https://github.com/matrix-org/matrix-spec-proposals/pull/3030

Complement tests: https://github.com/matrix-org/complement/pull/457
---
 changelog.d/13658.bugfix                        |  1 +
 synapse/storage/databases/main/events_worker.py | 12 ++++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13658.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13658.bugfix b/changelog.d/13658.bugfix
new file mode 100644
index 0000000000..8740f066bb
--- /dev/null
+++ b/changelog.d/13658.bugfix
@@ -0,0 +1 @@
+Fix MSC3030 `/timestamp_to_event` endpoint to return the correct next event when the events have the same timestamp.
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 8a7cdb024d..9b997c304d 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -2111,7 +2111,14 @@ class EventsWorkerStore(SQLBaseStore):
                 AND room_id = ?
                 /* Make sure event is not rejected */
                 AND rejections.event_id IS NULL
-            ORDER BY origin_server_ts %s
+            /**
+             * First sort by the message timestamp. If the message timestamps are the
+             * same, we want the message that logically comes "next" (before/after
+             * the given timestamp) based on the DAG and its topological order (`depth`).
+             * Finally, we can tie-break based on when it was received on the server
+             * (`stream_ordering`).
+             */
+            ORDER BY origin_server_ts %s, depth %s, stream_ordering %s
             LIMIT 1;
         """
 
@@ -2130,7 +2137,8 @@ class EventsWorkerStore(SQLBaseStore):
                 order = "ASC"
 
             txn.execute(
-                sql_template % (comparison_operator, order), (timestamp, room_id)
+                sql_template % (comparison_operator, order, order, order),
+                (timestamp, room_id),
             )
             row = txn.fetchone()
             if row:
-- 
cgit 1.5.1


From a160406d245cb84b48fa67fe3ee73f0cffceb495 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 31 Aug 2022 11:38:16 +0100
Subject: Fix admin List Room API return type on sqlite (#13509)

---
 changelog.d/13509.bugfix               |  1 +
 synapse/storage/databases/main/room.py |  6 ++++--
 tests/rest/admin/test_room.py          | 19 ++++++++++++++-----
 3 files changed, 19 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/13509.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13509.bugfix b/changelog.d/13509.bugfix
new file mode 100644
index 0000000000..6dcb9741d9
--- /dev/null
+++ b/changelog.d/13509.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.13 where the [List Rooms admin API](https://matrix-org.github.io/synapse/develop/admin_api/rooms.html#list-room-api) would return integers instead of booleans for the `federatable` and `public` fields when using a Sqlite database.
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index b7d4baa6bb..367424b4a8 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -641,8 +641,10 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
                         "version": room[5],
                         "creator": room[6],
                         "encryption": room[7],
-                        "federatable": room[8],
-                        "public": room[9],
+                        # room_stats_state.federatable is an integer on sqlite.
+                        "federatable": bool(room[8]),
+                        # rooms.is_public is an integer on sqlite.
+                        "public": bool(room[9]),
                         "join_rules": room[10],
                         "guest_access": room[11],
                         "history_visibility": room[12],
diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py
index fd6da557c1..9d71a97524 100644
--- a/tests/rest/admin/test_room.py
+++ b/tests/rest/admin/test_room.py
@@ -1080,7 +1080,9 @@ class RoomTestCase(unittest.HomeserverTestCase):
         room_ids = []
         for _ in range(total_rooms):
             room_id = self.helper.create_room_as(
-                self.admin_user, tok=self.admin_user_tok
+                self.admin_user,
+                tok=self.admin_user_tok,
+                is_public=True,
             )
             room_ids.append(room_id)
 
@@ -1119,8 +1121,8 @@ class RoomTestCase(unittest.HomeserverTestCase):
             self.assertIn("version", r)
             self.assertIn("creator", r)
             self.assertIn("encryption", r)
-            self.assertIn("federatable", r)
-            self.assertIn("public", r)
+            self.assertIs(r["federatable"], True)
+            self.assertIs(r["public"], True)
             self.assertIn("join_rules", r)
             self.assertIn("guest_access", r)
             self.assertIn("history_visibility", r)
@@ -1587,8 +1589,12 @@ class RoomTestCase(unittest.HomeserverTestCase):
     def test_single_room(self) -> None:
         """Test that a single room can be requested correctly"""
         # Create two test rooms
-        room_id_1 = self.helper.create_room_as(self.admin_user, tok=self.admin_user_tok)
-        room_id_2 = self.helper.create_room_as(self.admin_user, tok=self.admin_user_tok)
+        room_id_1 = self.helper.create_room_as(
+            self.admin_user, tok=self.admin_user_tok, is_public=True
+        )
+        room_id_2 = self.helper.create_room_as(
+            self.admin_user, tok=self.admin_user_tok, is_public=False
+        )
 
         room_name_1 = "something"
         room_name_2 = "else"
@@ -1634,7 +1640,10 @@ class RoomTestCase(unittest.HomeserverTestCase):
         self.assertIn("state_events", channel.json_body)
         self.assertIn("room_type", channel.json_body)
         self.assertIn("forgotten", channel.json_body)
+
         self.assertEqual(room_id_1, channel.json_body["room_id"])
+        self.assertIs(True, channel.json_body["federatable"])
+        self.assertIs(True, channel.json_body["public"])
 
     def test_single_room_devices(self) -> None:
         """Test that `joined_local_devices` can be requested correctly"""
-- 
cgit 1.5.1


From 7bc110a19e6de0572b0c9513726d13298b45ced2 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Wed, 31 Aug 2022 11:16:05 +0000
Subject: Generalise the `@cancellable` annotation so it can be used on
 functions other than just servlet methods. (#13662)

---
 changelog.d/13662.misc                          |  1 +
 synapse/federation/transport/server/_base.py    |  5 +-
 synapse/http/server.py                          | 68 ++-----------------------
 synapse/replication/http/_base.py               |  7 +--
 synapse/rest/client/room.py                     |  3 +-
 synapse/util/cancellation.py                    | 56 ++++++++++++++++++++
 tests/federation/transport/server/test__base.py |  3 +-
 tests/http/test_servlet.py                      |  2 +-
 tests/replication/http/test__base.py            |  3 +-
 tests/test_server.py                            |  2 +-
 10 files changed, 75 insertions(+), 75 deletions(-)
 create mode 100644 changelog.d/13662.misc
 create mode 100644 synapse/util/cancellation.py

(limited to 'synapse')

diff --git a/changelog.d/13662.misc b/changelog.d/13662.misc
new file mode 100644
index 0000000000..3dea4a1c2c
--- /dev/null
+++ b/changelog.d/13662.misc
@@ -0,0 +1 @@
+Generalise the `@cancellable` annotation so it can be used on functions other than just servlet methods.
\ No newline at end of file
diff --git a/synapse/federation/transport/server/_base.py b/synapse/federation/transport/server/_base.py
index bb0f8d6b7b..1db8009d6c 100644
--- a/synapse/federation/transport/server/_base.py
+++ b/synapse/federation/transport/server/_base.py
@@ -21,7 +21,7 @@ from typing import TYPE_CHECKING, Any, Awaitable, Callable, Dict, Optional, Tupl
 
 from synapse.api.errors import Codes, FederationDeniedError, SynapseError
 from synapse.api.urls import FEDERATION_V1_PREFIX
-from synapse.http.server import HttpServer, ServletCallback, is_method_cancellable
+from synapse.http.server import HttpServer, ServletCallback
 from synapse.http.servlet import parse_json_object_from_request
 from synapse.http.site import SynapseRequest
 from synapse.logging.context import run_in_background
@@ -34,6 +34,7 @@ from synapse.logging.opentracing import (
     whitelisted_homeserver,
 )
 from synapse.types import JsonDict
+from synapse.util.cancellation import is_function_cancellable
 from synapse.util.ratelimitutils import FederationRateLimiter
 from synapse.util.stringutils import parse_and_validate_server_name
 
@@ -375,7 +376,7 @@ class BaseFederationServlet:
             if code is None:
                 continue
 
-            if is_method_cancellable(code):
+            if is_function_cancellable(code):
                 # The wrapper added by `self._wrap` will inherit the cancellable flag,
                 # but the wrapper itself does not support cancellation yet.
                 # Once resolved, the cancellation tests in
diff --git a/synapse/http/server.py b/synapse/http/server.py
index 19f42159b8..6068a94b40 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -33,7 +33,6 @@ from typing import (
     Optional,
     Pattern,
     Tuple,
-    TypeVar,
     Union,
 )
 
@@ -64,6 +63,7 @@ from synapse.logging.context import defer_to_thread, preserve_fn, run_in_backgro
 from synapse.logging.opentracing import active_span, start_active_span, trace_servlet
 from synapse.util import json_encoder
 from synapse.util.caches import intern_dict
+from synapse.util.cancellation import is_function_cancellable
 from synapse.util.iterutils import chunk_seq
 
 if TYPE_CHECKING:
@@ -94,68 +94,6 @@ HTML_ERROR_TEMPLATE = """<!DOCTYPE html>
 HTTP_STATUS_REQUEST_CANCELLED = 499
 
 
-F = TypeVar("F", bound=Callable[..., Any])
-
-
-_cancellable_method_names = frozenset(
-    {
-        # `RestServlet`, `BaseFederationServlet` and `BaseFederationServerServlet`
-        # methods
-        "on_GET",
-        "on_PUT",
-        "on_POST",
-        "on_DELETE",
-        # `_AsyncResource`, `DirectServeHtmlResource` and `DirectServeJsonResource`
-        # methods
-        "_async_render_GET",
-        "_async_render_PUT",
-        "_async_render_POST",
-        "_async_render_DELETE",
-        "_async_render_OPTIONS",
-        # `ReplicationEndpoint` methods
-        "_handle_request",
-    }
-)
-
-
-def cancellable(method: F) -> F:
-    """Marks a servlet method as cancellable.
-
-    Methods with this decorator will be cancelled if the client disconnects before we
-    finish processing the request.
-
-    During cancellation, `Deferred.cancel()` will be invoked on the `Deferred` wrapping
-    the method. The `cancel()` call will propagate down to the `Deferred` that is
-    currently being waited on. That `Deferred` will raise a `CancelledError`, which will
-    propagate up, as per normal exception handling.
-
-    Before applying this decorator to a new endpoint, you MUST recursively check
-    that all `await`s in the function are on `async` functions or `Deferred`s that
-    handle cancellation cleanly, otherwise a variety of bugs may occur, ranging from
-    premature logging context closure, to stuck requests, to database corruption.
-
-    Usage:
-        class SomeServlet(RestServlet):
-            @cancellable
-            async def on_GET(self, request: SynapseRequest) -> ...:
-                ...
-    """
-    if method.__name__ not in _cancellable_method_names and not any(
-        method.__name__.startswith(prefix) for prefix in _cancellable_method_names
-    ):
-        raise ValueError(
-            "@cancellable decorator can only be applied to servlet methods."
-        )
-
-    method.cancellable = True  # type: ignore[attr-defined]
-    return method
-
-
-def is_method_cancellable(method: Callable[..., Any]) -> bool:
-    """Checks whether a servlet method has the `@cancellable` flag."""
-    return getattr(method, "cancellable", False)
-
-
 def return_json_error(
     f: failure.Failure, request: SynapseRequest, config: Optional[HomeServerConfig]
 ) -> None:
@@ -389,7 +327,7 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta):
 
         method_handler = getattr(self, "_async_render_%s" % (request_method,), None)
         if method_handler:
-            request.is_render_cancellable = is_method_cancellable(method_handler)
+            request.is_render_cancellable = is_function_cancellable(method_handler)
 
             raw_callback_return = method_handler(request)
 
@@ -551,7 +489,7 @@ class JsonResource(DirectServeJsonResource):
     async def _async_render(self, request: SynapseRequest) -> Tuple[int, Any]:
         callback, servlet_classname, group_dict = self._get_handler_for_request(request)
 
-        request.is_render_cancellable = is_method_cancellable(callback)
+        request.is_render_cancellable = is_function_cancellable(callback)
 
         # Make sure we have an appropriate name for this handler in prometheus
         # (rather than the default of JsonResource).
diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py
index 561ad5bf04..acb0bd18f7 100644
--- a/synapse/replication/http/_base.py
+++ b/synapse/replication/http/_base.py
@@ -26,12 +26,13 @@ from twisted.web.server import Request
 
 from synapse.api.errors import HttpResponseException, SynapseError
 from synapse.http import RequestTimedOutError
-from synapse.http.server import HttpServer, is_method_cancellable
+from synapse.http.server import HttpServer
 from synapse.http.site import SynapseRequest
 from synapse.logging import opentracing
 from synapse.logging.opentracing import trace_with_opname
 from synapse.types import JsonDict
 from synapse.util.caches.response_cache import ResponseCache
+from synapse.util.cancellation import is_function_cancellable
 from synapse.util.stringutils import random_string
 
 if TYPE_CHECKING:
@@ -311,7 +312,7 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
         url_args = list(self.PATH_ARGS)
         method = self.METHOD
 
-        if self.CACHE and is_method_cancellable(self._handle_request):
+        if self.CACHE and is_function_cancellable(self._handle_request):
             raise Exception(
                 f"{self.__class__.__name__} has been marked as cancellable, but CACHE "
                 "is set. The cancellable flag would have no effect."
@@ -359,6 +360,6 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
         # The `@cancellable` decorator may be applied to `_handle_request`. But we
         # told `HttpServer.register_paths` that our handler is `_check_auth_and_handle`,
         # so we have to set up the cancellable flag ourselves.
-        request.is_render_cancellable = is_method_cancellable(self._handle_request)
+        request.is_render_cancellable = is_function_cancellable(self._handle_request)
 
         return await self._handle_request(request, **kwargs)
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index 3259de4802..0e2834008e 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -37,7 +37,7 @@ from synapse.api.errors import (
 )
 from synapse.api.filtering import Filter
 from synapse.events.utils import format_event_for_client_v2
-from synapse.http.server import HttpServer, cancellable
+from synapse.http.server import HttpServer
 from synapse.http.servlet import (
     ResolveRoomIdMixin,
     RestServlet,
@@ -57,6 +57,7 @@ from synapse.storage.state import StateFilter
 from synapse.streams.config import PaginationConfig
 from synapse.types import JsonDict, StreamToken, ThirdPartyInstanceID, UserID
 from synapse.util import json_decoder
+from synapse.util.cancellation import cancellable
 from synapse.util.stringutils import parse_and_validate_server_name, random_string
 
 if TYPE_CHECKING:
diff --git a/synapse/util/cancellation.py b/synapse/util/cancellation.py
new file mode 100644
index 0000000000..472d2e3aeb
--- /dev/null
+++ b/synapse/util/cancellation.py
@@ -0,0 +1,56 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, TypeVar
+
+F = TypeVar("F", bound=Callable[..., Any])
+
+
+def cancellable(function: F) -> F:
+    """Marks a function as cancellable.
+
+    Servlet methods with this decorator will be cancelled if the client disconnects before we
+    finish processing the request.
+
+    Although this annotation is particularly useful for servlet methods, it's also
+    useful for intermediate functions, where it documents the fact that the function has
+    been audited for cancellation safety and needs to preserve that.
+    This then simplifies auditing new functions that call those same intermediate
+    functions.
+
+    During cancellation, `Deferred.cancel()` will be invoked on the `Deferred` wrapping
+    the method. The `cancel()` call will propagate down to the `Deferred` that is
+    currently being waited on. That `Deferred` will raise a `CancelledError`, which will
+    propagate up, as per normal exception handling.
+
+    Before applying this decorator to a new function, you MUST recursively check
+    that all `await`s in the function are on `async` functions or `Deferred`s that
+    handle cancellation cleanly, otherwise a variety of bugs may occur, ranging from
+    premature logging context closure, to stuck requests, to database corruption.
+
+    See the documentation page on Cancellation for more information.
+
+    Usage:
+        class SomeServlet(RestServlet):
+            @cancellable
+            async def on_GET(self, request: SynapseRequest) -> ...:
+                ...
+    """
+
+    function.cancellable = True  # type: ignore[attr-defined]
+    return function
+
+
+def is_function_cancellable(function: Callable[..., Any]) -> bool:
+    """Checks whether a servlet method has the `@cancellable` flag."""
+    return getattr(function, "cancellable", False)
diff --git a/tests/federation/transport/server/test__base.py b/tests/federation/transport/server/test__base.py
index d33e86db4c..e88e5d8bb3 100644
--- a/tests/federation/transport/server/test__base.py
+++ b/tests/federation/transport/server/test__base.py
@@ -18,9 +18,10 @@ from typing import Dict, List, Tuple
 from synapse.api.errors import Codes
 from synapse.federation.transport.server import BaseFederationServlet
 from synapse.federation.transport.server._base import Authenticator, _parse_auth_header
-from synapse.http.server import JsonResource, cancellable
+from synapse.http.server import JsonResource
 from synapse.server import HomeServer
 from synapse.types import JsonDict
+from synapse.util.cancellation import cancellable
 from synapse.util.ratelimitutils import FederationRateLimiter
 
 from tests import unittest
diff --git a/tests/http/test_servlet.py b/tests/http/test_servlet.py
index bb966c80c6..3cbca0f5a3 100644
--- a/tests/http/test_servlet.py
+++ b/tests/http/test_servlet.py
@@ -18,7 +18,6 @@ from typing import Tuple
 from unittest.mock import Mock
 
 from synapse.api.errors import Codes, SynapseError
-from synapse.http.server import cancellable
 from synapse.http.servlet import (
     RestServlet,
     parse_json_object_from_request,
@@ -28,6 +27,7 @@ from synapse.http.site import SynapseRequest
 from synapse.rest.client._base import client_patterns
 from synapse.server import HomeServer
 from synapse.types import JsonDict
+from synapse.util.cancellation import cancellable
 
 from tests import unittest
 from tests.http.server._base import test_disconnect
diff --git a/tests/replication/http/test__base.py b/tests/replication/http/test__base.py
index 822a957c3a..936ab4504a 100644
--- a/tests/replication/http/test__base.py
+++ b/tests/replication/http/test__base.py
@@ -18,11 +18,12 @@ from typing import Tuple
 from twisted.web.server import Request
 
 from synapse.api.errors import Codes
-from synapse.http.server import JsonResource, cancellable
+from synapse.http.server import JsonResource
 from synapse.replication.http import REPLICATION_PREFIX
 from synapse.replication.http._base import ReplicationEndpoint
 from synapse.server import HomeServer
 from synapse.types import JsonDict
+from synapse.util.cancellation import cancellable
 
 from tests import unittest
 from tests.http.server._base import test_disconnect
diff --git a/tests/test_server.py b/tests/test_server.py
index d2b2d8344a..23975d59c3 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -26,12 +26,12 @@ from synapse.http.server import (
     DirectServeJsonResource,
     JsonResource,
     OptionsResource,
-    cancellable,
 )
 from synapse.http.site import SynapseRequest, SynapseSite
 from synapse.logging.context import make_deferred_yieldable
 from synapse.types import JsonDict
 from synapse.util import Clock
+from synapse.util.cancellation import cancellable
 
 from tests import unittest
 from tests.http.server._base import test_disconnect
-- 
cgit 1.5.1


From 42b11d5565ed026c7d71f433c69e7b7007f45918 Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Wed, 31 Aug 2022 12:19:39 +0100
Subject: Remove cached wrap on `_get_joined_users_from_context` method
 (#13569)

The method doesn't actually do any data fetching and the method that
does, `_get_joined_profile_from_event_id`, has its own cache.

Signed off by Nick @ Beeper (@Fizzadar).
---
 changelog.d/13569.removal                    |   1 +
 synapse/state/__init__.py                    |   2 +-
 synapse/storage/databases/main/roommember.py | 122 +++++++++------------------
 3 files changed, 40 insertions(+), 85 deletions(-)
 create mode 100644 changelog.d/13569.removal

(limited to 'synapse')

diff --git a/changelog.d/13569.removal b/changelog.d/13569.removal
new file mode 100644
index 0000000000..af9d407671
--- /dev/null
+++ b/changelog.d/13569.removal
@@ -0,0 +1 @@
+Remove redundant `_get_joined_users_from_context` cache. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index 3047e1b1ad..3787d35b24 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -232,7 +232,7 @@ class StateHandler:
         logger.debug("calling resolve_state_groups from get_current_user_ids_in_room")
         entry = await self.resolve_state_groups_for_events(room_id, latest_event_ids)
         state = await entry.get_state(self._state_storage_controller, StateFilter.all())
-        return await self.store.get_joined_user_ids_from_state(room_id, state, entry)
+        return await self.store.get_joined_user_ids_from_state(room_id, state)
 
     async def get_hosts_in_room_at_events(
         self, room_id: str, event_ids: Collection[str]
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 06500457bd..4f0adb136a 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -31,7 +31,6 @@ from typing import (
 import attr
 
 from synapse.api.constants import EventTypes, Membership
-from synapse.events import EventBase
 from synapse.metrics import LaterGauge
 from synapse.metrics.background_process_metrics import (
     run_as_background_process,
@@ -883,96 +882,51 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         return shared_room_ids or frozenset()
 
     async def get_joined_user_ids_from_state(
-        self, room_id: str, state: StateMap[str], state_entry: "_StateCacheEntry"
+        self, room_id: str, state: StateMap[str]
     ) -> Set[str]:
-        state_group: Union[object, int] = state_entry.state_group
-        if not state_group:
-            # If state_group is None it means it has yet to be assigned a
-            # state group, i.e. we need to make sure that calls with a state_group
-            # of None don't hit previous cached calls with a None state_group.
-            # To do this we set the state_group to a new object as object() != object()
-            state_group = object()
-
-        assert state_group is not None
-        with Measure(self._clock, "get_joined_users_from_state"):
-            return await self._get_joined_user_ids_from_context(
-                room_id, state_group, state, context=state_entry
-            )
+        """
+        For a given set of state IDs, get a set of user IDs in the room.
 
-    @cached(num_args=2, iterable=True, max_entries=100000)
-    async def _get_joined_user_ids_from_context(
-        self,
-        room_id: str,
-        state_group: Union[object, int],
-        current_state_ids: StateMap[str],
-        event: Optional[EventBase] = None,
-        context: Optional["_StateCacheEntry"] = None,
-    ) -> Set[str]:
-        # We don't use `state_group`, it's there so that we can cache based
-        # on it. However, it's important that it's never None, since two current_states
-        # with a state_group of None are likely to be different.
-        assert state_group is not None
+        This method checks the local event cache, before calling
+        `_get_user_ids_from_membership_event_ids` for any uncached events.
+        """
 
-        users_in_room = set()
-        member_event_ids = [
-            e_id
-            for key, e_id in current_state_ids.items()
-            if key[0] == EventTypes.Member
-        ]
-
-        if context is not None:
-            # If we have a context with a delta from a previous state group,
-            # check if we also have the result from the previous group in cache.
-            # If we do then we can reuse that result and simply update it with
-            # any membership changes in `delta_ids`
-            if context.prev_group and context.delta_ids:
-                prev_res = self._get_joined_user_ids_from_context.cache.get_immediate(
-                    (room_id, context.prev_group), None
-                )
-                if prev_res and isinstance(prev_res, set):
-                    users_in_room = prev_res
-                    member_event_ids = [
-                        e_id
-                        for key, e_id in context.delta_ids.items()
-                        if key[0] == EventTypes.Member
-                    ]
-                    for etype, state_key in context.delta_ids:
-                        if etype == EventTypes.Member:
-                            users_in_room.discard(state_key)
-
-        # We check if we have any of the member event ids in the event cache
-        # before we ask the DB
-
-        # We don't update the event cache hit ratio as it completely throws off
-        # the hit ratio counts. After all, we don't populate the cache if we
-        # miss it here
-        event_map = self._get_events_from_local_cache(
-            member_event_ids, update_metrics=False
-        )
+        with Measure(self._clock, "get_joined_user_ids_from_state"):
+            users_in_room = set()
+            member_event_ids = [
+                e_id for key, e_id in state.items() if key[0] == EventTypes.Member
+            ]
 
-        missing_member_event_ids = []
-        for event_id in member_event_ids:
-            ev_entry = event_map.get(event_id)
-            if ev_entry and not ev_entry.event.rejected_reason:
-                if ev_entry.event.membership == Membership.JOIN:
-                    users_in_room.add(ev_entry.event.state_key)
-            else:
-                missing_member_event_ids.append(event_id)
+            # We check if we have any of the member event ids in the event cache
+            # before we ask the DB
 
-        if missing_member_event_ids:
-            event_to_memberships = await self._get_user_ids_from_membership_event_ids(
-                missing_member_event_ids
-            )
-            users_in_room.update(
-                user_id for user_id in event_to_memberships.values() if user_id
+            # We don't update the event cache hit ratio as it completely throws off
+            # the hit ratio counts. After all, we don't populate the cache if we
+            # miss it here
+            event_map = self._get_events_from_local_cache(
+                member_event_ids, update_metrics=False
             )
 
-        if event is not None and event.type == EventTypes.Member:
-            if event.membership == Membership.JOIN:
-                if event.event_id in member_event_ids:
-                    users_in_room.add(event.state_key)
+            missing_member_event_ids = []
+            for event_id in member_event_ids:
+                ev_entry = event_map.get(event_id)
+                if ev_entry and not ev_entry.event.rejected_reason:
+                    if ev_entry.event.membership == Membership.JOIN:
+                        users_in_room.add(ev_entry.event.state_key)
+                else:
+                    missing_member_event_ids.append(event_id)
+
+            if missing_member_event_ids:
+                event_to_memberships = (
+                    await self._get_user_ids_from_membership_event_ids(
+                        missing_member_event_ids
+                    )
+                )
+                users_in_room.update(
+                    user_id for user_id in event_to_memberships.values() if user_id
+                )
 
-        return users_in_room
+            return users_in_room
 
     @cached(
         max_entries=10000,
@@ -1205,7 +1159,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
                 # The cache doesn't match the state group or prev state group,
                 # so we calculate the result from first principles.
                 joined_user_ids = await self.get_joined_user_ids_from_state(
-                    room_id, state, state_entry
+                    room_id, state
                 )
 
                 cache.hosts_to_joined_users = {}
-- 
cgit 1.5.1


From 84ddcd7bbfe4100101741a408a91f283a8f742c7 Mon Sep 17 00:00:00 2001
From: Jacek Kuśnierz <jacek.kusnierz@tum.de>
Date: Wed, 31 Aug 2022 14:10:25 +0200
Subject: Drop support for calling `/_matrix/client/v3/rooms/{roomId}/invite`
 without an `id_access_token` (#13241)

Fixes #13206

Signed-off-by: Jacek Kusnierz jacek.kusnierz@tum.de
---
 changelog.d/13241.removal                 |   1 +
 synapse/handlers/identity.py              | 142 +++++-------------------------
 synapse/handlers/room.py                  |  20 ++++-
 synapse/handlers/room_member.py           |   6 +-
 synapse/rest/client/room.py               |  20 +++--
 synapse/rest/media/v1/media_repository.py |   1 -
 tests/rest/client/test_identity.py        |   3 +-
 tests/rest/client/test_rooms.py           |  18 ++++
 tests/rest/client/test_shadow_banned.py   |   7 +-
 9 files changed, 81 insertions(+), 137 deletions(-)
 create mode 100644 changelog.d/13241.removal

(limited to 'synapse')

diff --git a/changelog.d/13241.removal b/changelog.d/13241.removal
new file mode 100644
index 0000000000..60b0e7969c
--- /dev/null
+++ b/changelog.d/13241.removal
@@ -0,0 +1 @@
+Drop support for calling `/_matrix/client/v3/rooms/{roomId}/invite` without an `id_access_token`, which was not permitted by the spec. Contributed by @Vetchu.
\ No newline at end of file
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index 9571d461c8..93d09e9939 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -538,11 +538,7 @@ class IdentityHandler:
             raise SynapseError(400, "Error contacting the identity server")
 
     async def lookup_3pid(
-        self,
-        id_server: str,
-        medium: str,
-        address: str,
-        id_access_token: Optional[str] = None,
+        self, id_server: str, medium: str, address: str, id_access_token: str
     ) -> Optional[str]:
         """Looks up a 3pid in the passed identity server.
 
@@ -557,60 +553,15 @@ class IdentityHandler:
         Returns:
             the matrix ID of the 3pid, or None if it is not recognized.
         """
-        if id_access_token is not None:
-            try:
-                results = await self._lookup_3pid_v2(
-                    id_server, id_access_token, medium, address
-                )
-                return results
-
-            except Exception as e:
-                # Catch HttpResponseExcept for a non-200 response code
-                # Check if this identity server does not know about v2 lookups
-                if isinstance(e, HttpResponseException) and e.code == 404:
-                    # This is an old identity server that does not yet support v2 lookups
-                    logger.warning(
-                        "Attempted v2 lookup on v1 identity server %s. Falling "
-                        "back to v1",
-                        id_server,
-                    )
-                else:
-                    logger.warning("Error when looking up hashing details: %s", e)
-                    return None
-
-        return await self._lookup_3pid_v1(id_server, medium, address)
-
-    async def _lookup_3pid_v1(
-        self, id_server: str, medium: str, address: str
-    ) -> Optional[str]:
-        """Looks up a 3pid in the passed identity server using v1 lookup.
 
-        Args:
-            id_server: The server name (including port, if required)
-                of the identity server to use.
-            medium: The type of the third party identifier (e.g. "email").
-            address: The third party identifier (e.g. "foo@example.com").
-
-        Returns:
-            the matrix ID of the 3pid, or None if it is not recognized.
-        """
         try:
-            data = await self.blacklisting_http_client.get_json(
-                "%s%s/_matrix/identity/api/v1/lookup" % (id_server_scheme, id_server),
-                {"medium": medium, "address": address},
+            results = await self._lookup_3pid_v2(
+                id_server, id_access_token, medium, address
             )
-
-            if "mxid" in data:
-                # note: we used to verify the identity server's signature here, but no longer
-                # require or validate it. See the following for context:
-                # https://github.com/matrix-org/synapse/issues/5253#issuecomment-666246950
-                return data["mxid"]
-        except RequestTimedOutError:
-            raise SynapseError(500, "Timed out contacting identity server")
-        except OSError as e:
-            logger.warning("Error from v1 identity server lookup: %s" % (e,))
-
-        return None
+            return results
+        except Exception as e:
+            logger.warning("Error when looking up hashing details: %s", e)
+            return None
 
     async def _lookup_3pid_v2(
         self, id_server: str, id_access_token: str, medium: str, address: str
@@ -739,7 +690,7 @@ class IdentityHandler:
         room_type: Optional[str],
         inviter_display_name: str,
         inviter_avatar_url: str,
-        id_access_token: Optional[str] = None,
+        id_access_token: str,
     ) -> Tuple[str, List[Dict[str, str]], Dict[str, str], str]:
         """
         Asks an identity server for a third party invite.
@@ -760,7 +711,7 @@ class IdentityHandler:
             inviter_display_name: The current display name of the
                 inviter.
             inviter_avatar_url: The URL of the inviter's avatar.
-            id_access_token (str|None): The access token to authenticate to the identity
+            id_access_token (str): The access token to authenticate to the identity
                 server with
 
         Returns:
@@ -792,71 +743,24 @@ class IdentityHandler:
             invite_config["org.matrix.web_client_location"] = self._web_client_location
 
         # Add the identity service access token to the JSON body and use the v2
-        # Identity Service endpoints if id_access_token is present
+        # Identity Service endpoints
         data = None
-        base_url = "%s%s/_matrix/identity" % (id_server_scheme, id_server)
 
-        if id_access_token:
-            key_validity_url = "%s%s/_matrix/identity/v2/pubkey/isvalid" % (
-                id_server_scheme,
-                id_server,
-            )
+        key_validity_url = "%s%s/_matrix/identity/v2/pubkey/isvalid" % (
+            id_server_scheme,
+            id_server,
+        )
 
-            # Attempt a v2 lookup
-            url = base_url + "/v2/store-invite"
-            try:
-                data = await self.blacklisting_http_client.post_json_get_json(
-                    url,
-                    invite_config,
-                    {"Authorization": create_id_access_token_header(id_access_token)},
-                )
-            except RequestTimedOutError:
-                raise SynapseError(500, "Timed out contacting identity server")
-            except HttpResponseException as e:
-                if e.code != 404:
-                    logger.info("Failed to POST %s with JSON: %s", url, e)
-                    raise e
-
-        if data is None:
-            key_validity_url = "%s%s/_matrix/identity/api/v1/pubkey/isvalid" % (
-                id_server_scheme,
-                id_server,
+        url = "%s%s/_matrix/identity/v2/store-invite" % (id_server_scheme, id_server)
+        try:
+            data = await self.blacklisting_http_client.post_json_get_json(
+                url,
+                invite_config,
+                {"Authorization": create_id_access_token_header(id_access_token)},
             )
-            url = base_url + "/api/v1/store-invite"
-
-            try:
-                data = await self.blacklisting_http_client.post_json_get_json(
-                    url, invite_config
-                )
-            except RequestTimedOutError:
-                raise SynapseError(500, "Timed out contacting identity server")
-            except HttpResponseException as e:
-                logger.warning(
-                    "Error trying to call /store-invite on %s%s: %s",
-                    id_server_scheme,
-                    id_server,
-                    e,
-                )
-
-            if data is None:
-                # Some identity servers may only support application/x-www-form-urlencoded
-                # types. This is especially true with old instances of Sydent, see
-                # https://github.com/matrix-org/sydent/pull/170
-                try:
-                    data = await self.blacklisting_http_client.post_urlencoded_get_json(
-                        url, invite_config
-                    )
-                except HttpResponseException as e:
-                    logger.warning(
-                        "Error calling /store-invite on %s%s with fallback "
-                        "encoding: %s",
-                        id_server_scheme,
-                        id_server,
-                        e,
-                    )
-                    raise e
-
-        # TODO: Check for success
+        except RequestTimedOutError:
+            raise SynapseError(500, "Timed out contacting identity server")
+
         token = data["token"]
         public_keys = data.get("public_keys", [])
         if "public_key" in data:
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index f64a8690a5..33e9a87002 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -19,6 +19,7 @@ import math
 import random
 import string
 from collections import OrderedDict
+from http import HTTPStatus
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -704,8 +705,8 @@ class RoomCreationHandler:
                 was, requested, `room_alias`. Secondly, the stream_id of the
                 last persisted event.
         Raises:
-            SynapseError if the room ID couldn't be stored, or something went
-            horribly wrong.
+            SynapseError if the room ID couldn't be stored, 3pid invitation config
+            validation failed, or something went horribly wrong.
             ResourceLimitError if server is blocked to some resource being
             exceeded
         """
@@ -731,6 +732,19 @@ class RoomCreationHandler:
         invite_3pid_list = config.get("invite_3pid", [])
         invite_list = config.get("invite", [])
 
+        # validate each entry for correctness
+        for invite_3pid in invite_3pid_list:
+            if not all(
+                key in invite_3pid
+                for key in ("medium", "address", "id_server", "id_access_token")
+            ):
+                raise SynapseError(
+                    HTTPStatus.BAD_REQUEST,
+                    "all of `medium`, `address`, `id_server` and `id_access_token` "
+                    "are required when making a 3pid invite",
+                    Codes.MISSING_PARAM,
+                )
+
         if not is_requester_admin:
             spam_check = await self.spam_checker.user_may_create_room(user_id)
             if spam_check != NOT_SPAM:
@@ -978,7 +992,7 @@ class RoomCreationHandler:
 
         for invite_3pid in invite_3pid_list:
             id_server = invite_3pid["id_server"]
-            id_access_token = invite_3pid.get("id_access_token")  # optional
+            id_access_token = invite_3pid["id_access_token"]
             address = invite_3pid["address"]
             medium = invite_3pid["medium"]
             # Note that do_3pid_invite can raise a  ShadowBanError, but this was
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index e726997d83..5d4adf5bfd 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -1382,7 +1382,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         id_server: str,
         requester: Requester,
         txn_id: Optional[str],
-        id_access_token: Optional[str] = None,
+        id_access_token: str,
         prev_event_ids: Optional[List[str]] = None,
         depth: Optional[int] = None,
     ) -> Tuple[str, int]:
@@ -1397,7 +1397,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             requester: The user making the request.
             txn_id: The transaction ID this is part of, or None if this is not
                 part of a transaction.
-            id_access_token: The optional identity server access token.
+            id_access_token: Identity server access token.
             depth: Override the depth used to order the event in the DAG.
             prev_event_ids: The event IDs to use as the prev events
                 Should normally be set to None, which will cause the depth to be calculated
@@ -1494,7 +1494,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         room_id: str,
         user: UserID,
         txn_id: Optional[str],
-        id_access_token: Optional[str] = None,
+        id_access_token: str,
         prev_event_ids: Optional[List[str]] = None,
         depth: Optional[int] = None,
     ) -> Tuple[EventBase, int]:
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index 0e2834008e..0bca012535 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -17,6 +17,7 @@
 import logging
 import re
 from enum import Enum
+from http import HTTPStatus
 from typing import TYPE_CHECKING, Awaitable, Dict, List, Optional, Tuple
 from urllib import parse as urlparse
 
@@ -947,7 +948,16 @@ class RoomMembershipRestServlet(TransactionRestServlet):
             # cheekily send invalid bodies.
             content = {}
 
-        if membership_action == "invite" and self._has_3pid_invite_keys(content):
+        if membership_action == "invite" and all(
+            key in content for key in ("medium", "address")
+        ):
+            if not all(key in content for key in ("id_server", "id_access_token")):
+                raise SynapseError(
+                    HTTPStatus.BAD_REQUEST,
+                    "`id_server` and `id_access_token` are required when doing 3pid invite",
+                    Codes.MISSING_PARAM,
+                )
+
             try:
                 await self.room_member_handler.do_3pid_invite(
                     room_id,
@@ -957,7 +967,7 @@ class RoomMembershipRestServlet(TransactionRestServlet):
                     content["id_server"],
                     requester,
                     txn_id,
-                    content.get("id_access_token"),
+                    content["id_access_token"],
                 )
             except ShadowBanError:
                 # Pretend the request succeeded.
@@ -994,12 +1004,6 @@ class RoomMembershipRestServlet(TransactionRestServlet):
 
         return 200, return_value
 
-    def _has_3pid_invite_keys(self, content: JsonDict) -> bool:
-        for key in {"id_server", "medium", "address"}:
-            if key not in content:
-                return False
-        return True
-
     def on_PUT(
         self, request: SynapseRequest, room_id: str, membership_action: str, txn_id: str
     ) -> Awaitable[Tuple[int, JsonDict]]:
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index 7435fd9130..9dd3c8d4bb 100644
--- a/synapse/rest/media/v1/media_repository.py
+++ b/synapse/rest/media/v1/media_repository.py
@@ -64,7 +64,6 @@ if TYPE_CHECKING:
 
 logger = logging.getLogger(__name__)
 
-
 # How often to run the background job to update the "recently accessed"
 # attribute of local and remote media.
 UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000  # 1 minute
diff --git a/tests/rest/client/test_identity.py b/tests/rest/client/test_identity.py
index dc17c9d113..b0c8215744 100644
--- a/tests/rest/client/test_identity.py
+++ b/tests/rest/client/test_identity.py
@@ -25,7 +25,6 @@ from tests import unittest
 
 
 class IdentityTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets_for_client_rest_resource,
         room.register_servlets,
@@ -33,7 +32,6 @@ class IdentityTestCase(unittest.HomeserverTestCase):
     ]
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-
         config = self.default_config()
         config["enable_3pid_lookup"] = False
         self.hs = self.setup_test_homeserver(config=config)
@@ -54,6 +52,7 @@ class IdentityTestCase(unittest.HomeserverTestCase):
             "id_server": "testis",
             "medium": "email",
             "address": "test@example.com",
+            "id_access_token": tok,
         }
         request_url = ("/rooms/%s/invite" % (room_id)).encode("ascii")
         channel = self.make_request(
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index aa2f578441..c7eb88d33f 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -3461,3 +3461,21 @@ class ThreepidInviteTestCase(unittest.HomeserverTestCase):
 
         # Also check that it stopped before calling _make_and_store_3pid_invite.
         make_invite_mock.assert_called_once()
+
+    def test_400_missing_param_without_id_access_token(self) -> None:
+        """
+        Test that a 3pid invite request returns 400 M_MISSING_PARAM
+        if we do not include id_access_token.
+        """
+        channel = self.make_request(
+            method="POST",
+            path="/rooms/" + self.room_id + "/invite",
+            content={
+                "id_server": "example.com",
+                "medium": "email",
+                "address": "teresa@example.com",
+            },
+            access_token=self.tok,
+        )
+        self.assertEqual(channel.code, 400)
+        self.assertEqual(channel.json_body["errcode"], "M_MISSING_PARAM")
diff --git a/tests/rest/client/test_shadow_banned.py b/tests/rest/client/test_shadow_banned.py
index c50f034b34..c807a37bc2 100644
--- a/tests/rest/client/test_shadow_banned.py
+++ b/tests/rest/client/test_shadow_banned.py
@@ -97,7 +97,12 @@ class RoomTestCase(_ShadowBannedBase):
         channel = self.make_request(
             "POST",
             "/rooms/%s/invite" % (room_id,),
-            {"id_server": "test", "medium": "email", "address": "test@test.test"},
+            {
+                "id_server": "test",
+                "medium": "email",
+                "address": "test@test.test",
+                "id_access_token": "anytoken",
+            },
             access_token=self.banned_access_token,
         )
         self.assertEqual(200, channel.code, channel.result)
-- 
cgit 1.5.1


From 0e99f07952edcb6396654e34da50ddeb0a211067 Mon Sep 17 00:00:00 2001
From: Šimon Brandner <simon.bra.ag@gmail.com>
Date: Thu, 1 Sep 2022 14:31:54 +0200
Subject: Remove support for unstable private read receipts (#13653)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Šimon Brandner <simon.bra.ag@gmail.com>
---
 changelog.d/13653.removal                          |  1 +
 synapse/api/constants.py                           |  1 -
 synapse/config/experimental.py                     |  3 --
 synapse/handlers/receipts.py                       | 29 +++----------
 synapse/replication/tcp/client.py                  |  5 +--
 synapse/rest/client/notifications.py               |  1 -
 synapse/rest/client/read_marker.py                 |  2 -
 synapse/rest/client/receipts.py                    |  2 -
 synapse/rest/client/versions.py                    |  1 -
 .../storage/databases/main/event_push_actions.py   |  2 -
 tests/handlers/test_receipts.py                    | 48 ++++++----------------
 tests/rest/client/test_sync.py                     | 37 +++++------------
 tests/storage/test_receipts.py                     | 34 ++++++---------
 13 files changed, 44 insertions(+), 122 deletions(-)
 create mode 100644 changelog.d/13653.removal

(limited to 'synapse')

diff --git a/changelog.d/13653.removal b/changelog.d/13653.removal
new file mode 100644
index 0000000000..eb075d4517
--- /dev/null
+++ b/changelog.d/13653.removal
@@ -0,0 +1 @@
+Remove support for unstable [private read receipts](https://github.com/matrix-org/matrix-spec-proposals/pull/2285).
diff --git a/synapse/api/constants.py b/synapse/api/constants.py
index c73aea622a..c178ddf070 100644
--- a/synapse/api/constants.py
+++ b/synapse/api/constants.py
@@ -258,7 +258,6 @@ class GuestAccess:
 class ReceiptTypes:
     READ: Final = "m.read"
     READ_PRIVATE: Final = "m.read.private"
-    UNSTABLE_READ_PRIVATE: Final = "org.matrix.msc2285.read.private"
     FULLY_READ: Final = "m.fully_read"
 
 
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index c1ff417539..260db49cad 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -32,9 +32,6 @@ class ExperimentalConfig(Config):
         # MSC2716 (importing historical messages)
         self.msc2716_enabled: bool = experimental.get("msc2716_enabled", False)
 
-        # MSC2285 (unstable private read receipts)
-        self.msc2285_enabled: bool = experimental.get("msc2285_enabled", False)
-
         # MSC3244 (room version capabilities)
         self.msc3244_enabled: bool = experimental.get("msc3244_enabled", True)
 
diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py
index d4a866b346..d2bdb9c8be 100644
--- a/synapse/handlers/receipts.py
+++ b/synapse/handlers/receipts.py
@@ -163,10 +163,7 @@ class ReceiptsHandler:
         if not is_new:
             return
 
-        if self.federation_sender and receipt_type not in (
-            ReceiptTypes.READ_PRIVATE,
-            ReceiptTypes.UNSTABLE_READ_PRIVATE,
-        ):
+        if self.federation_sender and receipt_type != ReceiptTypes.READ_PRIVATE:
             await self.federation_sender.send_read_receipt(receipt)
 
 
@@ -206,38 +203,24 @@ class ReceiptEventSource(EventSource[int, JsonDict]):
             for event_id, orig_event_content in room.get("content", {}).items():
                 event_content = orig_event_content
                 # If there are private read receipts, additional logic is necessary.
-                if (
-                    ReceiptTypes.READ_PRIVATE in event_content
-                    or ReceiptTypes.UNSTABLE_READ_PRIVATE in event_content
-                ):
+                if ReceiptTypes.READ_PRIVATE in event_content:
                     # Make a copy without private read receipts to avoid leaking
                     # other user's private read receipts..
                     event_content = {
                         receipt_type: receipt_value
                         for receipt_type, receipt_value in event_content.items()
-                        if receipt_type
-                        not in (
-                            ReceiptTypes.READ_PRIVATE,
-                            ReceiptTypes.UNSTABLE_READ_PRIVATE,
-                        )
+                        if receipt_type != ReceiptTypes.READ_PRIVATE
                     }
 
                     # Copy the current user's private read receipt from the
                     # original content, if it exists.
-                    user_private_read_receipt = orig_event_content.get(
-                        ReceiptTypes.READ_PRIVATE, {}
-                    ).get(user_id, None)
+                    user_private_read_receipt = orig_event_content[
+                        ReceiptTypes.READ_PRIVATE
+                    ].get(user_id, None)
                     if user_private_read_receipt:
                         event_content[ReceiptTypes.READ_PRIVATE] = {
                             user_id: user_private_read_receipt
                         }
-                    user_unstable_private_read_receipt = orig_event_content.get(
-                        ReceiptTypes.UNSTABLE_READ_PRIVATE, {}
-                    ).get(user_id, None)
-                    if user_unstable_private_read_receipt:
-                        event_content[ReceiptTypes.UNSTABLE_READ_PRIVATE] = {
-                            user_id: user_unstable_private_read_receipt
-                        }
 
                 # Include the event if there is at least one non-private read
                 # receipt or the current user has a private read receipt.
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 1ed7230e32..e4f2201c92 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -416,10 +416,7 @@ class FederationSenderHandler:
             if not self._is_mine_id(receipt.user_id):
                 continue
             # Private read receipts never get sent over federation.
-            if receipt.receipt_type in (
-                ReceiptTypes.READ_PRIVATE,
-                ReceiptTypes.UNSTABLE_READ_PRIVATE,
-            ):
+            if receipt.receipt_type == ReceiptTypes.READ_PRIVATE:
                 continue
             receipt_info = ReadReceipt(
                 receipt.room_id,
diff --git a/synapse/rest/client/notifications.py b/synapse/rest/client/notifications.py
index a73322a6a4..61268e3af1 100644
--- a/synapse/rest/client/notifications.py
+++ b/synapse/rest/client/notifications.py
@@ -62,7 +62,6 @@ class NotificationsServlet(RestServlet):
             [
                 ReceiptTypes.READ,
                 ReceiptTypes.READ_PRIVATE,
-                ReceiptTypes.UNSTABLE_READ_PRIVATE,
             ],
         )
 
diff --git a/synapse/rest/client/read_marker.py b/synapse/rest/client/read_marker.py
index aaad8b233f..5e53096539 100644
--- a/synapse/rest/client/read_marker.py
+++ b/synapse/rest/client/read_marker.py
@@ -45,8 +45,6 @@ class ReadMarkerRestServlet(RestServlet):
             ReceiptTypes.FULLY_READ,
             ReceiptTypes.READ_PRIVATE,
         }
-        if hs.config.experimental.msc2285_enabled:
-            self._known_receipt_types.add(ReceiptTypes.UNSTABLE_READ_PRIVATE)
 
     async def on_POST(
         self, request: SynapseRequest, room_id: str
diff --git a/synapse/rest/client/receipts.py b/synapse/rest/client/receipts.py
index c6108fc5eb..5b7fad7402 100644
--- a/synapse/rest/client/receipts.py
+++ b/synapse/rest/client/receipts.py
@@ -49,8 +49,6 @@ class ReceiptRestServlet(RestServlet):
             ReceiptTypes.READ_PRIVATE,
             ReceiptTypes.FULLY_READ,
         }
-        if hs.config.experimental.msc2285_enabled:
-            self._known_receipt_types.add(ReceiptTypes.UNSTABLE_READ_PRIVATE)
 
     async def on_POST(
         self, request: SynapseRequest, room_id: str, receipt_type: str, event_id: str
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index c9a830cbac..c516cda95d 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -95,7 +95,6 @@ class VersionsRestServlet(RestServlet):
                     "org.matrix.msc3026.busy_presence": self.config.experimental.msc3026_enabled,
                     # Supports receiving private read receipts as per MSC2285
                     "org.matrix.msc2285.stable": True,  # TODO: Remove when MSC2285 becomes a part of the spec
-                    "org.matrix.msc2285": self.config.experimental.msc2285_enabled,
                     # Supports filtering of /publicRooms by room type as per MSC3827
                     "org.matrix.msc3827.stable": True,
                     # Adds support for importing historical messages as per MSC2716
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 9f410d69de..f4a07de2a3 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -274,7 +274,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             receipt_types=(
                 ReceiptTypes.READ,
                 ReceiptTypes.READ_PRIVATE,
-                ReceiptTypes.UNSTABLE_READ_PRIVATE,
             ),
         )
 
@@ -468,7 +467,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             (
                 ReceiptTypes.READ,
                 ReceiptTypes.READ_PRIVATE,
-                ReceiptTypes.UNSTABLE_READ_PRIVATE,
             ),
         )
 
diff --git a/tests/handlers/test_receipts.py b/tests/handlers/test_receipts.py
index 5f70a2db79..b55238650c 100644
--- a/tests/handlers/test_receipts.py
+++ b/tests/handlers/test_receipts.py
@@ -15,8 +15,6 @@
 from copy import deepcopy
 from typing import List
 
-from parameterized import parameterized
-
 from synapse.api.constants import EduTypes, ReceiptTypes
 from synapse.types import JsonDict
 
@@ -27,16 +25,13 @@ class ReceiptsTestCase(unittest.HomeserverTestCase):
     def prepare(self, reactor, clock, hs):
         self.event_source = hs.get_event_sources().sources.receipt
 
-    @parameterized.expand(
-        [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE]
-    )
-    def test_filters_out_private_receipt(self, receipt_type: str) -> None:
+    def test_filters_out_private_receipt(self) -> None:
         self._test_filters_private(
             [
                 {
                     "content": {
                         "$1435641916114394fHBLK:matrix.org": {
-                            receipt_type: {
+                            ReceiptTypes.READ_PRIVATE: {
                                 "@rikj:jki.re": {
                                     "ts": 1436451550453,
                                 }
@@ -50,18 +45,13 @@ class ReceiptsTestCase(unittest.HomeserverTestCase):
             [],
         )
 
-    @parameterized.expand(
-        [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE]
-    )
-    def test_filters_out_private_receipt_and_ignores_rest(
-        self, receipt_type: str
-    ) -> None:
+    def test_filters_out_private_receipt_and_ignores_rest(self) -> None:
         self._test_filters_private(
             [
                 {
                     "content": {
                         "$1dgdgrd5641916114394fHBLK:matrix.org": {
-                            receipt_type: {
+                            ReceiptTypes.READ_PRIVATE: {
                                 "@rikj:jki.re": {
                                     "ts": 1436451550453,
                                 },
@@ -94,18 +84,15 @@ class ReceiptsTestCase(unittest.HomeserverTestCase):
             ],
         )
 
-    @parameterized.expand(
-        [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE]
-    )
     def test_filters_out_event_with_only_private_receipts_and_ignores_the_rest(
-        self, receipt_type: str
+        self,
     ) -> None:
         self._test_filters_private(
             [
                 {
                     "content": {
                         "$14356419edgd14394fHBLK:matrix.org": {
-                            receipt_type: {
+                            ReceiptTypes.READ_PRIVATE: {
                                 "@rikj:jki.re": {
                                     "ts": 1436451550453,
                                 },
@@ -175,18 +162,15 @@ class ReceiptsTestCase(unittest.HomeserverTestCase):
             ],
         )
 
-    @parameterized.expand(
-        [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE]
-    )
     def test_filters_out_receipt_event_with_only_private_receipt_and_ignores_rest(
-        self, receipt_type: str
+        self,
     ) -> None:
         self._test_filters_private(
             [
                 {
                     "content": {
                         "$14356419edgd14394fHBLK:matrix.org": {
-                            receipt_type: {
+                            ReceiptTypes.READ_PRIVATE: {
                                 "@rikj:jki.re": {
                                     "ts": 1436451550453,
                                 },
@@ -262,16 +246,13 @@ class ReceiptsTestCase(unittest.HomeserverTestCase):
             ],
         )
 
-    @parameterized.expand(
-        [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE]
-    )
-    def test_leaves_our_private_and_their_public(self, receipt_type: str) -> None:
+    def test_leaves_our_private_and_their_public(self) -> None:
         self._test_filters_private(
             [
                 {
                     "content": {
                         "$1dgdgrd5641916114394fHBLK:matrix.org": {
-                            receipt_type: {
+                            ReceiptTypes.READ_PRIVATE: {
                                 "@me:server.org": {
                                     "ts": 1436451550453,
                                 },
@@ -296,7 +277,7 @@ class ReceiptsTestCase(unittest.HomeserverTestCase):
                 {
                     "content": {
                         "$1dgdgrd5641916114394fHBLK:matrix.org": {
-                            receipt_type: {
+                            ReceiptTypes.READ_PRIVATE: {
                                 "@me:server.org": {
                                     "ts": 1436451550453,
                                 },
@@ -319,16 +300,13 @@ class ReceiptsTestCase(unittest.HomeserverTestCase):
             ],
         )
 
-    @parameterized.expand(
-        [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE]
-    )
-    def test_we_do_not_mutate(self, receipt_type: str) -> None:
+    def test_we_do_not_mutate(self) -> None:
         """Ensure the input values are not modified."""
         events = [
             {
                 "content": {
                     "$1435641916114394fHBLK:matrix.org": {
-                        receipt_type: {
+                        ReceiptTypes.READ_PRIVATE: {
                             "@rikj:jki.re": {
                                 "ts": 1436451550453,
                             }
diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py
index de0dec8539..0af643ecd9 100644
--- a/tests/rest/client/test_sync.py
+++ b/tests/rest/client/test_sync.py
@@ -391,7 +391,6 @@ class ReadReceiptsTestCase(unittest.HomeserverTestCase):
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         config = self.default_config()
-        config["experimental_features"] = {"msc2285_enabled": True}
 
         return self.setup_test_homeserver(config=config)
 
@@ -413,17 +412,14 @@ class ReadReceiptsTestCase(unittest.HomeserverTestCase):
         # Join the second user
         self.helper.join(room=self.room_id, user=self.user2, tok=self.tok2)
 
-    @parameterized.expand(
-        [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE]
-    )
-    def test_private_read_receipts(self, receipt_type: str) -> None:
+    def test_private_read_receipts(self) -> None:
         # Send a message as the first user
         res = self.helper.send(self.room_id, body="hello", tok=self.tok)
 
         # Send a private read receipt to tell the server the first user's message was read
         channel = self.make_request(
             "POST",
-            f"/rooms/{self.room_id}/receipt/{receipt_type}/{res['event_id']}",
+            f"/rooms/{self.room_id}/receipt/{ReceiptTypes.READ_PRIVATE}/{res['event_id']}",
             {},
             access_token=self.tok2,
         )
@@ -432,10 +428,7 @@ class ReadReceiptsTestCase(unittest.HomeserverTestCase):
         # Test that the first user can't see the other user's private read receipt
         self.assertIsNone(self._get_read_receipt())
 
-    @parameterized.expand(
-        [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE]
-    )
-    def test_public_receipt_can_override_private(self, receipt_type: str) -> None:
+    def test_public_receipt_can_override_private(self) -> None:
         """
         Sending a public read receipt to the same event which has a private read
         receipt should cause that receipt to become public.
@@ -446,7 +439,7 @@ class ReadReceiptsTestCase(unittest.HomeserverTestCase):
         # Send a private read receipt
         channel = self.make_request(
             "POST",
-            f"/rooms/{self.room_id}/receipt/{receipt_type}/{res['event_id']}",
+            f"/rooms/{self.room_id}/receipt/{ReceiptTypes.READ_PRIVATE}/{res['event_id']}",
             {},
             access_token=self.tok2,
         )
@@ -465,10 +458,7 @@ class ReadReceiptsTestCase(unittest.HomeserverTestCase):
         # Test that we did override the private read receipt
         self.assertNotEqual(self._get_read_receipt(), None)
 
-    @parameterized.expand(
-        [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE]
-    )
-    def test_private_receipt_cannot_override_public(self, receipt_type: str) -> None:
+    def test_private_receipt_cannot_override_public(self) -> None:
         """
         Sending a private read receipt to the same event which has a public read
         receipt should cause no change.
@@ -489,7 +479,7 @@ class ReadReceiptsTestCase(unittest.HomeserverTestCase):
         # Send a private read receipt
         channel = self.make_request(
             "POST",
-            f"/rooms/{self.room_id}/receipt/{receipt_type}/{res['event_id']}",
+            f"/rooms/{self.room_id}/receipt/{ReceiptTypes.READ_PRIVATE}/{res['event_id']}",
             {},
             access_token=self.tok2,
         )
@@ -554,7 +544,6 @@ class UnreadMessagesTestCase(unittest.HomeserverTestCase):
         config = super().default_config()
         config["experimental_features"] = {
             "msc2654_enabled": True,
-            "msc2285_enabled": True,
         }
         return config
 
@@ -601,10 +590,7 @@ class UnreadMessagesTestCase(unittest.HomeserverTestCase):
             tok=self.tok,
         )
 
-    @parameterized.expand(
-        [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE]
-    )
-    def test_unread_counts(self, receipt_type: str) -> None:
+    def test_unread_counts(self) -> None:
         """Tests that /sync returns the right value for the unread count (MSC2654)."""
 
         # Check that our own messages don't increase the unread count.
@@ -638,7 +624,7 @@ class UnreadMessagesTestCase(unittest.HomeserverTestCase):
         # Send a read receipt to tell the server we've read the latest event.
         channel = self.make_request(
             "POST",
-            f"/rooms/{self.room_id}/receipt/{receipt_type}/{res['event_id']}",
+            f"/rooms/{self.room_id}/receipt/{ReceiptTypes.READ_PRIVATE}/{res['event_id']}",
             {},
             access_token=self.tok,
         )
@@ -726,7 +712,7 @@ class UnreadMessagesTestCase(unittest.HomeserverTestCase):
 
         channel = self.make_request(
             "POST",
-            f"/rooms/{self.room_id}/receipt/{receipt_type}/{res2['event_id']}",
+            f"/rooms/{self.room_id}/receipt/{ReceiptTypes.READ_PRIVATE}/{res2['event_id']}",
             {},
             access_token=self.tok,
         )
@@ -738,7 +724,6 @@ class UnreadMessagesTestCase(unittest.HomeserverTestCase):
         [
             ReceiptTypes.READ,
             ReceiptTypes.READ_PRIVATE,
-            ReceiptTypes.UNSTABLE_READ_PRIVATE,
         ]
     )
     def test_read_receipts_only_go_down(self, receipt_type: str) -> None:
@@ -752,7 +737,7 @@ class UnreadMessagesTestCase(unittest.HomeserverTestCase):
         # Read last event
         channel = self.make_request(
             "POST",
-            f"/rooms/{self.room_id}/receipt/{receipt_type}/{res2['event_id']}",
+            f"/rooms/{self.room_id}/receipt/{ReceiptTypes.READ_PRIVATE}/{res2['event_id']}",
             {},
             access_token=self.tok,
         )
@@ -763,7 +748,7 @@ class UnreadMessagesTestCase(unittest.HomeserverTestCase):
         # read receipt go up to an older event
         channel = self.make_request(
             "POST",
-            f"/rooms/{self.room_id}/receipt/{receipt_type}/{res1['event_id']}",
+            f"/rooms/{self.room_id}/receipt/{ReceiptTypes.READ_PRIVATE}/{res1['event_id']}",
             {},
             access_token=self.tok,
         )
diff --git a/tests/storage/test_receipts.py b/tests/storage/test_receipts.py
index 191c957fb5..c89bfff241 100644
--- a/tests/storage/test_receipts.py
+++ b/tests/storage/test_receipts.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from parameterized import parameterized
 
 from synapse.api.constants import ReceiptTypes
 from synapse.types import UserID, create_requester
@@ -92,7 +91,6 @@ class ReceiptTestCase(HomeserverTestCase):
                 [
                     ReceiptTypes.READ,
                     ReceiptTypes.READ_PRIVATE,
-                    ReceiptTypes.UNSTABLE_READ_PRIVATE,
                 ],
             )
         )
@@ -104,7 +102,6 @@ class ReceiptTestCase(HomeserverTestCase):
                 [
                     ReceiptTypes.READ,
                     ReceiptTypes.READ_PRIVATE,
-                    ReceiptTypes.UNSTABLE_READ_PRIVATE,
                 ],
             )
         )
@@ -117,16 +114,12 @@ class ReceiptTestCase(HomeserverTestCase):
                 [
                     ReceiptTypes.READ,
                     ReceiptTypes.READ_PRIVATE,
-                    ReceiptTypes.UNSTABLE_READ_PRIVATE,
                 ],
             )
         )
         self.assertEqual(res, None)
 
-    @parameterized.expand(
-        [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE]
-    )
-    def test_get_receipts_for_user(self, receipt_type: str) -> None:
+    def test_get_receipts_for_user(self) -> None:
         # Send some events into the first room
         event1_1_id = self.create_and_send_event(
             self.room_id1, UserID.from_string(OTHER_USER_ID)
@@ -144,14 +137,14 @@ class ReceiptTestCase(HomeserverTestCase):
         # Send private read receipt for the second event
         self.get_success(
             self.store.insert_receipt(
-                self.room_id1, receipt_type, OUR_USER_ID, [event1_2_id], {}
+                self.room_id1, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event1_2_id], {}
             )
         )
 
         # Test we get the latest event when we want both private and public receipts
         res = self.get_success(
             self.store.get_receipts_for_user(
-                OUR_USER_ID, [ReceiptTypes.READ, receipt_type]
+                OUR_USER_ID, [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE]
             )
         )
         self.assertEqual(res, {self.room_id1: event1_2_id})
@@ -164,7 +157,7 @@ class ReceiptTestCase(HomeserverTestCase):
 
         # Test we get the latest event when we want only the public receipt
         res = self.get_success(
-            self.store.get_receipts_for_user(OUR_USER_ID, [receipt_type])
+            self.store.get_receipts_for_user(OUR_USER_ID, [ReceiptTypes.READ_PRIVATE])
         )
         self.assertEqual(res, {self.room_id1: event1_2_id})
 
@@ -187,20 +180,17 @@ class ReceiptTestCase(HomeserverTestCase):
         # Test new room is reflected in what the method returns
         self.get_success(
             self.store.insert_receipt(
-                self.room_id2, receipt_type, OUR_USER_ID, [event2_1_id], {}
+                self.room_id2, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event2_1_id], {}
             )
         )
         res = self.get_success(
             self.store.get_receipts_for_user(
-                OUR_USER_ID, [ReceiptTypes.READ, receipt_type]
+                OUR_USER_ID, [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE]
             )
         )
         self.assertEqual(res, {self.room_id1: event1_2_id, self.room_id2: event2_1_id})
 
-    @parameterized.expand(
-        [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE]
-    )
-    def test_get_last_receipt_event_id_for_user(self, receipt_type: str) -> None:
+    def test_get_last_receipt_event_id_for_user(self) -> None:
         # Send some events into the first room
         event1_1_id = self.create_and_send_event(
             self.room_id1, UserID.from_string(OTHER_USER_ID)
@@ -218,7 +208,7 @@ class ReceiptTestCase(HomeserverTestCase):
         # Send private read receipt for the second event
         self.get_success(
             self.store.insert_receipt(
-                self.room_id1, receipt_type, OUR_USER_ID, [event1_2_id], {}
+                self.room_id1, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event1_2_id], {}
             )
         )
 
@@ -227,7 +217,7 @@ class ReceiptTestCase(HomeserverTestCase):
             self.store.get_last_receipt_event_id_for_user(
                 OUR_USER_ID,
                 self.room_id1,
-                [ReceiptTypes.READ, receipt_type],
+                [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE],
             )
         )
         self.assertEqual(res, event1_2_id)
@@ -243,7 +233,7 @@ class ReceiptTestCase(HomeserverTestCase):
         # Test we get the latest event when we want only the private receipt
         res = self.get_success(
             self.store.get_last_receipt_event_id_for_user(
-                OUR_USER_ID, self.room_id1, [receipt_type]
+                OUR_USER_ID, self.room_id1, [ReceiptTypes.READ_PRIVATE]
             )
         )
         self.assertEqual(res, event1_2_id)
@@ -269,14 +259,14 @@ class ReceiptTestCase(HomeserverTestCase):
         # Test new room is reflected in what the method returns
         self.get_success(
             self.store.insert_receipt(
-                self.room_id2, receipt_type, OUR_USER_ID, [event2_1_id], {}
+                self.room_id2, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event2_1_id], {}
             )
         )
         res = self.get_success(
             self.store.get_last_receipt_event_id_for_user(
                 OUR_USER_ID,
                 self.room_id2,
-                [ReceiptTypes.READ, receipt_type],
+                [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE],
             )
         )
         self.assertEqual(res, event2_1_id)
-- 
cgit 1.5.1


From e8130f219b8aebb00441e4979d34eb3b5d3d6f03 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Thu, 1 Sep 2022 13:54:02 +0100
Subject: Return keys for unwhitelisted servers from `/_matrix/key/v2/query`
 (#13683)

---
 changelog.d/13683.bugfix                   |  1 +
 synapse/rest/key/v2/remote_key_resource.py | 41 +++++++++++++++---------------
 2 files changed, 22 insertions(+), 20 deletions(-)
 create mode 100644 changelog.d/13683.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13683.bugfix b/changelog.d/13683.bugfix
new file mode 100644
index 0000000000..538534fec1
--- /dev/null
+++ b/changelog.d/13683.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug which meant that keys for unwhitelisted servers were not returned by `/_matrix/key/v2/query`.
diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py
index f597157581..7f8ad29566 100644
--- a/synapse/rest/key/v2/remote_key_resource.py
+++ b/synapse/rest/key/v2/remote_key_resource.py
@@ -135,13 +135,6 @@ class RemoteKey(DirectServeJsonResource):
 
         store_queries = []
         for server_name, key_ids in query.items():
-            if (
-                self.federation_domain_whitelist is not None
-                and server_name not in self.federation_domain_whitelist
-            ):
-                logger.debug("Federation denied with %s", server_name)
-                continue
-
             if not key_ids:
                 key_ids = (None,)
             for key_id in key_ids:
@@ -153,21 +146,28 @@ class RemoteKey(DirectServeJsonResource):
 
         time_now_ms = self.clock.time_msec()
 
-        # Note that the value is unused.
+        # Map server_name->key_id->int. Note that the value of the init is unused.
+        # XXX: why don't we just use a set?
         cache_misses: Dict[str, Dict[str, int]] = {}
         for (server_name, key_id, _), key_results in cached.items():
             results = [(result["ts_added_ms"], result) for result in key_results]
 
-            if not results and key_id is not None:
-                cache_misses.setdefault(server_name, {})[key_id] = 0
+            if key_id is None:
+                # all keys were requested. Just return what we have without worrying
+                # about validity
+                for _, result in results:
+                    # Cast to bytes since postgresql returns a memoryview.
+                    json_results.add(bytes(result["key_json"]))
                 continue
 
-            if key_id is not None:
+            miss = False
+            if not results:
+                miss = True
+            else:
                 ts_added_ms, most_recent_result = max(results)
                 ts_valid_until_ms = most_recent_result["ts_valid_until_ms"]
                 req_key = query.get(server_name, {}).get(key_id, {})
                 req_valid_until = req_key.get("minimum_valid_until_ts")
-                miss = False
                 if req_valid_until is not None:
                     if ts_valid_until_ms < req_valid_until:
                         logger.debug(
@@ -211,19 +211,20 @@ class RemoteKey(DirectServeJsonResource):
                         ts_valid_until_ms,
                         time_now_ms,
                     )
-
-                if miss:
-                    cache_misses.setdefault(server_name, {})[key_id] = 0
                 # Cast to bytes since postgresql returns a memoryview.
                 json_results.add(bytes(most_recent_result["key_json"]))
-            else:
-                for _, result in results:
-                    # Cast to bytes since postgresql returns a memoryview.
-                    json_results.add(bytes(result["key_json"]))
+
+            if miss and query_remote_on_cache_miss:
+                # only bother attempting to fetch keys from servers on our whitelist
+                if (
+                    self.federation_domain_whitelist is None
+                    or server_name in self.federation_domain_whitelist
+                ):
+                    cache_misses.setdefault(server_name, {})[key_id] = 0
 
         # If there is a cache miss, request the missing keys, then recurse (and
         # ensure the result is sent).
-        if cache_misses and query_remote_on_cache_miss:
+        if cache_misses:
             await yieldable_gather_results(
                 lambda t: self.fetcher.get_keys(*t),
                 (
-- 
cgit 1.5.1


From 2318603772f2e1595f9b39369246d5955a069381 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 1 Sep 2022 14:54:52 +0200
Subject: Add some logging to help track down #13444 (#13679)

---
 changelog.d/13679.misc                |  1 +
 synapse/federation/sender/__init__.py | 13 +++++++++++++
 2 files changed, 14 insertions(+)
 create mode 100644 changelog.d/13679.misc

(limited to 'synapse')

diff --git a/changelog.d/13679.misc b/changelog.d/13679.misc
new file mode 100644
index 0000000000..a4fa94da9d
--- /dev/null
+++ b/changelog.d/13679.misc
@@ -0,0 +1 @@
+Add some logging to help track down #13444.
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index 94a65ac65f..8bc60e3e3e 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -441,6 +441,19 @@ class FederationSender(AbstractFederationSender):
                             destinations = await self._external_cache.get(
                                 "get_joined_hosts", str(sg)
                             )
+                            if destinations is None:
+                                # Add logging to help track down #13444
+                                logger.info(
+                                    "Unexpectedly did not have cached destinations for %s / %s",
+                                    sg,
+                                    event.event_id,
+                                )
+                        else:
+                            # Add logging to help track down #13444
+                            logger.info(
+                                "Unexpectedly did not have cached prev group for %s",
+                                event.event_id,
+                            )
 
                     if destinations is None:
                         try:
-- 
cgit 1.5.1


From 9d2823ab70e69a90f4a54256bd32cd85eab130d9 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 1 Sep 2022 17:07:01 +0200
Subject: Cache `is_partial_state_room` (#13693)

Fixes #13613.
---
 changelog.d/13693.misc                 |  1 +
 synapse/storage/databases/main/room.py | 11 +++++++----
 2 files changed, 8 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/13693.misc

(limited to 'synapse')

diff --git a/changelog.d/13693.misc b/changelog.d/13693.misc
new file mode 100644
index 0000000000..31490191c9
--- /dev/null
+++ b/changelog.d/13693.misc
@@ -0,0 +1 @@
+Add cache to `is_partial_state_room`.
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 367424b4a8..bef66f1992 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -1185,8 +1185,9 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
             )
             return False
 
-    @staticmethod
-    def _clear_partial_state_room_txn(txn: LoggingTransaction, room_id: str) -> None:
+    def _clear_partial_state_room_txn(
+        self, txn: LoggingTransaction, room_id: str
+    ) -> None:
         DatabasePool.simple_delete_txn(
             txn,
             table="partial_state_rooms_servers",
@@ -1197,7 +1198,9 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
             table="partial_state_rooms",
             keyvalues={"room_id": room_id},
         )
+        self._invalidate_cache_and_stream(txn, self.is_partial_state_room, (room_id,))
 
+    @cached()
     async def is_partial_state_room(self, room_id: str) -> bool:
         """Checks if this room has partial state.
 
@@ -1771,9 +1774,8 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
             servers,
         )
 
-    @staticmethod
     def _store_partial_state_room_txn(
-        txn: LoggingTransaction, room_id: str, servers: Collection[str]
+        self, txn: LoggingTransaction, room_id: str, servers: Collection[str]
     ) -> None:
         DatabasePool.simple_insert_txn(
             txn,
@@ -1788,6 +1790,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
             keys=("room_id", "server_name"),
             values=((room_id, s) for s in servers),
         )
+        self._invalidate_cache_and_stream(txn, self.is_partial_state_room, (room_id,))
 
     async def maybe_store_room_on_outlier_membership(
         self, room_id: str, room_version: RoomVersion
-- 
cgit 1.5.1


From 390b7ce946173b41a61f427ef25a9a1d0371ad0b Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 1 Sep 2022 12:52:03 -0400
Subject: Disable calculating unread counts unless the config flag is enabled.
 (#13694)

This avoids doing work that will never be used (since the
resulting unread counts will never be sent in a /sync
response).

The negative of doing this is that unread counts will be
incorrect when the feature is initially enabled.
---
 changelog.d/13694.bugfix                 |  1 +
 synapse/config/experimental.py           |  3 +++
 synapse/push/bulk_push_rule_evaluator.py |  7 +++++-
 tests/storage/test_event_push_actions.py | 42 +++++++++++++++-----------------
 4 files changed, 30 insertions(+), 23 deletions(-)
 create mode 100644 changelog.d/13694.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13694.bugfix b/changelog.d/13694.bugfix
new file mode 100644
index 0000000000..48b9bb5f0a
--- /dev/null
+++ b/changelog.d/13694.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse v1.20.0 that would cause the unstable unread counts from [MSC2654](https://github.com/matrix-org/matrix-spec-proposals/pull/2654) to be calculated even if the feature is disabled.
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 260db49cad..702b81e636 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -71,6 +71,9 @@ class ExperimentalConfig(Config):
         self.msc3720_enabled: bool = experimental.get("msc3720_enabled", False)
 
         # MSC2654: Unread counts
+        #
+        # Note that enabling this will result in an incorrect unread count for
+        # previously calculated push actions.
         self.msc2654_enabled: bool = experimental.get("msc2654_enabled", False)
 
         # MSC2815 (allow room moderators to view redacted event content)
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index ccd512be54..d1caf8a0f7 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -262,7 +262,12 @@ class BulkPushRuleEvaluator:
             # This can happen due to out of band memberships
             return
 
-        count_as_unread = _should_count_as_unread(event, context)
+        # Disable counting as unread unless the experimental configuration is
+        # enabled, as it can cause additional (unwanted) rows to be added to the
+        # event_push_actions table.
+        count_as_unread = False
+        if self.hs.config.experimental.msc2654_enabled:
+            count_as_unread = _should_count_as_unread(event, context)
 
         rules_by_user = await self._get_rules_for_event(event)
         actions_by_user: Dict[str, Collection[Union[Mapping, str]]] = {}
diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py
index 62fd4aeb2f..fc43d7edd1 100644
--- a/tests/storage/test_event_push_actions.py
+++ b/tests/storage/test_event_push_actions.py
@@ -67,9 +67,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
 
         last_event_id: str
 
-        def _assert_counts(
-            noitf_count: int, unread_count: int, highlight_count: int
-        ) -> None:
+        def _assert_counts(noitf_count: int, highlight_count: int) -> None:
             counts = self.get_success(
                 self.store.db_pool.runInteraction(
                     "get-unread-counts",
@@ -82,7 +80,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
                 counts,
                 NotifCounts(
                     notify_count=noitf_count,
-                    unread_count=unread_count,
+                    unread_count=0,
                     highlight_count=highlight_count,
                 ),
             )
@@ -112,27 +110,27 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
                 )
             )
 
-        _assert_counts(0, 0, 0)
+        _assert_counts(0, 0)
         _create_event()
-        _assert_counts(1, 1, 0)
+        _assert_counts(1, 0)
         _rotate()
-        _assert_counts(1, 1, 0)
+        _assert_counts(1, 0)
 
         event_id = _create_event()
-        _assert_counts(2, 2, 0)
+        _assert_counts(2, 0)
         _rotate()
-        _assert_counts(2, 2, 0)
+        _assert_counts(2, 0)
 
         _create_event()
         _mark_read(event_id)
-        _assert_counts(1, 1, 0)
+        _assert_counts(1, 0)
 
         _mark_read(last_event_id)
-        _assert_counts(0, 0, 0)
+        _assert_counts(0, 0)
 
         _create_event()
         _rotate()
-        _assert_counts(1, 1, 0)
+        _assert_counts(1, 0)
 
         # Delete old event push actions, this should not affect the (summarised) count.
         #
@@ -151,35 +149,35 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
             )
         )
         self.assertEqual(result, [])
-        _assert_counts(1, 1, 0)
+        _assert_counts(1, 0)
 
         _mark_read(last_event_id)
-        _assert_counts(0, 0, 0)
+        _assert_counts(0, 0)
 
         event_id = _create_event(True)
-        _assert_counts(1, 1, 1)
+        _assert_counts(1, 1)
         _rotate()
-        _assert_counts(1, 1, 1)
+        _assert_counts(1, 1)
 
         # Check that adding another notification and rotating after highlight
         # works.
         _create_event()
         _rotate()
-        _assert_counts(2, 2, 1)
+        _assert_counts(2, 1)
 
         # Check that sending read receipts at different points results in the
         # right counts.
         _mark_read(event_id)
-        _assert_counts(1, 1, 0)
+        _assert_counts(1, 0)
         _mark_read(last_event_id)
-        _assert_counts(0, 0, 0)
+        _assert_counts(0, 0)
 
         _create_event(True)
-        _assert_counts(1, 1, 1)
+        _assert_counts(1, 1)
         _mark_read(last_event_id)
-        _assert_counts(0, 0, 0)
+        _assert_counts(0, 0)
         _rotate()
-        _assert_counts(0, 0, 0)
+        _assert_counts(0, 0)
 
     def test_find_first_stream_ordering_after_ts(self) -> None:
         def add_event(so: int, ts: int) -> None:
-- 
cgit 1.5.1


From 48a5c47a9f7379f3b7e85c5a8dcbae346eb22310 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 1 Sep 2022 14:57:47 -0400
Subject: Add a schema delta to drop unstable private read receipts. (#13692)

Otherwise they'll be leaked due to the filtering code only respecting
the stable identifiers for private read receipts.
---
 changelog.d/13692.removal                             |  1 +
 .../72/05remove_unstable_private_read_receipts.sql    | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+)
 create mode 100644 changelog.d/13692.removal
 create mode 100644 synapse/storage/schema/main/delta/72/05remove_unstable_private_read_receipts.sql

(limited to 'synapse')

diff --git a/changelog.d/13692.removal b/changelog.d/13692.removal
new file mode 100644
index 0000000000..eb075d4517
--- /dev/null
+++ b/changelog.d/13692.removal
@@ -0,0 +1 @@
+Remove support for unstable [private read receipts](https://github.com/matrix-org/matrix-spec-proposals/pull/2285).
diff --git a/synapse/storage/schema/main/delta/72/05remove_unstable_private_read_receipts.sql b/synapse/storage/schema/main/delta/72/05remove_unstable_private_read_receipts.sql
new file mode 100644
index 0000000000..36b41049cd
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/05remove_unstable_private_read_receipts.sql
@@ -0,0 +1,19 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Drop previously received private read receipts so they do not accidentally
+-- get leaked to other users.
+DELETE FROM receipts_linearized WHERE receipt_type = 'org.matrix.msc2285.read.private';
+DELETE FROM receipts_graph WHERE receipt_type = 'org.matrix.msc2285.read.private';
-- 
cgit 1.5.1


From 898fef2789c9b1a20ef53c7d588f536f51f0fe2f Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Mon, 5 Sep 2022 12:26:43 +0200
Subject: Share some metrics between the Prometheus exporter and the phone home
 stats (#13671)

---
 changelog.d/13671.misc                  |  1 +
 synapse/app/_base.py                    |  5 ++-
 synapse/app/phone_stats_home.py         | 13 +++++-
 synapse/metrics/common_usage_metrics.py | 79 +++++++++++++++++++++++++++++++++
 synapse/server.py                       |  6 +++
 tests/test_phone_home.py                | 46 ++++++++++++++++++-
 6 files changed, 146 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/13671.misc
 create mode 100644 synapse/metrics/common_usage_metrics.py

(limited to 'synapse')

diff --git a/changelog.d/13671.misc b/changelog.d/13671.misc
new file mode 100644
index 0000000000..f1c62b5b1e
--- /dev/null
+++ b/changelog.d/13671.misc
@@ -0,0 +1 @@
+Introduce a `CommonUsageMetrics` class to share some usage metrics between the Prometheus exporter and the phone home stats.
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 4742435d3b..9a24bed0a0 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -511,9 +511,10 @@ async def start(hs: "HomeServer") -> None:
     setup_sentry(hs)
     setup_sdnotify(hs)
 
-    # If background tasks are running on the main process, start collecting the
-    # phone home stats.
+    # If background tasks are running on the main process or this is the worker in
+    # charge of them, start collecting the phone home stats and shared usage metrics.
     if hs.config.worker.run_background_tasks:
+        await hs.get_common_usage_metrics_manager().setup()
         start_phone_stats_home(hs)
 
     # We now freeze all allocated objects in the hopes that (almost)
diff --git a/synapse/app/phone_stats_home.py b/synapse/app/phone_stats_home.py
index 40dbdace8e..51c8d15711 100644
--- a/synapse/app/phone_stats_home.py
+++ b/synapse/app/phone_stats_home.py
@@ -51,6 +51,16 @@ async def phone_stats_home(
     stats: JsonDict,
     stats_process: List[Tuple[int, "resource.struct_rusage"]] = _stats_process,
 ) -> None:
+    """Collect usage statistics and send them to the configured endpoint.
+
+    Args:
+        hs: the HomeServer object to use for gathering usage data.
+        stats: the dict in which to store the statistics sent to the configured
+            endpoint. Mostly used in tests to figure out the data that is supposed to
+            be sent.
+        stats_process: statistics about resource usage of the process.
+    """
+
     logger.info("Gathering stats for reporting")
     now = int(hs.get_clock().time())
     # Ensure the homeserver has started.
@@ -83,6 +93,7 @@ async def phone_stats_home(
     #
 
     store = hs.get_datastores().main
+    common_metrics = await hs.get_common_usage_metrics_manager().get_metrics()
 
     stats["homeserver"] = hs.config.server.server_name
     stats["server_context"] = hs.config.server.server_context
@@ -104,7 +115,7 @@ async def phone_stats_home(
     room_count = await store.get_room_count()
     stats["total_room_count"] = room_count
 
-    stats["daily_active_users"] = await store.count_daily_users()
+    stats["daily_active_users"] = common_metrics.daily_active_users
     stats["monthly_active_users"] = await store.count_monthly_users()
     daily_active_e2ee_rooms = await store.count_daily_active_e2ee_rooms()
     stats["daily_active_e2ee_rooms"] = daily_active_e2ee_rooms
diff --git a/synapse/metrics/common_usage_metrics.py b/synapse/metrics/common_usage_metrics.py
new file mode 100644
index 0000000000..0a22ea3d92
--- /dev/null
+++ b/synapse/metrics/common_usage_metrics.py
@@ -0,0 +1,79 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import TYPE_CHECKING
+
+import attr
+
+from synapse.metrics.background_process_metrics import run_as_background_process
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+from prometheus_client import Gauge
+
+# Gauge to expose daily active users metrics
+current_dau_gauge = Gauge(
+    "synapse_admin_daily_active_users",
+    "Current daily active users count",
+)
+
+
+@attr.s(auto_attribs=True)
+class CommonUsageMetrics:
+    """Usage metrics shared between the phone home stats and the prometheus exporter."""
+
+    daily_active_users: int
+
+
+class CommonUsageMetricsManager:
+    """Collects common usage metrics."""
+
+    def __init__(self, hs: "HomeServer") -> None:
+        self._store = hs.get_datastores().main
+        self._clock = hs.get_clock()
+
+    async def get_metrics(self) -> CommonUsageMetrics:
+        """Get the CommonUsageMetrics object. If no collection has happened yet, do it
+        before returning the metrics.
+
+        Returns:
+            The CommonUsageMetrics object to read common metrics from.
+        """
+        return await self._collect()
+
+    async def setup(self) -> None:
+        """Keep the gauges for common usage metrics up to date."""
+        await self._update_gauges()
+        self._clock.looping_call(
+            run_as_background_process,
+            5 * 60 * 1000,
+            desc="common_usage_metrics_update_gauges",
+            func=self._update_gauges,
+        )
+
+    async def _collect(self) -> CommonUsageMetrics:
+        """Collect the common metrics and either create the CommonUsageMetrics object to
+        use if it doesn't exist yet, or update it.
+        """
+        dau_count = await self._store.count_daily_users()
+
+        return CommonUsageMetrics(
+            daily_active_users=dau_count,
+        )
+
+    async def _update_gauges(self) -> None:
+        """Update the Prometheus gauges."""
+        metrics = await self._collect()
+
+        current_dau_gauge.set(float(metrics.daily_active_users))
diff --git a/synapse/server.py b/synapse/server.py
index c2e55bf0b1..5a99c0b344 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -105,6 +105,7 @@ from synapse.handlers.typing import FollowerTypingHandler, TypingWriterHandler
 from synapse.handlers.user_directory import UserDirectoryHandler
 from synapse.http.client import InsecureInterceptableContextFactory, SimpleHttpClient
 from synapse.http.matrixfederationclient import MatrixFederationHttpClient
+from synapse.metrics.common_usage_metrics import CommonUsageMetricsManager
 from synapse.module_api import ModuleApi
 from synapse.notifier import Notifier
 from synapse.push.bulk_push_rule_evaluator import BulkPushRuleEvaluator
@@ -829,3 +830,8 @@ class HomeServer(metaclass=abc.ABCMeta):
             self.config.ratelimiting.rc_message,
             self.config.ratelimiting.rc_admin_redaction,
         )
+
+    @cache_in_self
+    def get_common_usage_metrics_manager(self) -> CommonUsageMetricsManager:
+        """Usage metrics shared between phone home stats and the prometheus exporter."""
+        return CommonUsageMetricsManager(self)
diff --git a/tests/test_phone_home.py b/tests/test_phone_home.py
index b01cae6e5d..cc1a98f1c4 100644
--- a/tests/test_phone_home.py
+++ b/tests/test_phone_home.py
@@ -15,8 +15,14 @@
 import resource
 from unittest import mock
 
+from twisted.test.proto_helpers import MemoryReactor
+
 from synapse.app.phone_stats_home import phone_stats_home
+from synapse.rest import admin
+from synapse.rest.client import login, sync
+from synapse.server import HomeServer
 from synapse.types import JsonDict
+from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase
 
@@ -47,5 +53,43 @@ class PhoneHomeStatsTestCase(HomeserverTestCase):
         stats: JsonDict = {}
         self.reactor.advance(1)
         # `old_resource` has type `Mock` instead of `struct_rusage`
-        self.get_success(phone_stats_home(self.hs, stats, past_stats))  # type: ignore[arg-type]
+        self.get_success(
+            phone_stats_home(self.hs, stats, past_stats)  # type: ignore[arg-type]
+        )
         self.assertApproximates(stats["cpu_average"], 100, tolerance=2.5)
+
+
+class CommonMetricsTestCase(HomeserverTestCase):
+    servlets = [
+        admin.register_servlets,
+        login.register_servlets,
+        sync.register_servlets,
+    ]
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.metrics_manager = hs.get_common_usage_metrics_manager()
+        self.get_success(self.metrics_manager.setup())
+
+    def test_dau(self) -> None:
+        """Tests that the daily active users count is correctly updated."""
+        self._assert_metric_value("daily_active_users", 0)
+
+        self.register_user("user", "password")
+        tok = self.login("user", "password")
+        self.make_request("GET", "/sync", access_token=tok)
+
+        self.pump(1)
+
+        self._assert_metric_value("daily_active_users", 1)
+
+    def _assert_metric_value(self, metric_name: str, expected: int) -> None:
+        """Compare the given value to the current value of the common usage metric with
+        the given name.
+
+        Args:
+            metric_name: The metric to look up.
+            expected: Expected value for this metric.
+        """
+        metrics = self.get_success(self.metrics_manager.get_metrics())
+        value = getattr(metrics, metric_name)
+        self.assertEqual(value, expected)
-- 
cgit 1.5.1


From 8cb9261598becb3ed790b4268ff713f5e1122a21 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 5 Sep 2022 12:13:13 +0100
Subject: Fix typechecking with latest `types-jsonschema` (#13712)

---
 changelog.d/13712.misc   | 1 +
 synapse/api/filtering.py | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/13712.misc

(limited to 'synapse')

diff --git a/changelog.d/13712.misc b/changelog.d/13712.misc
new file mode 100644
index 0000000000..2c4f6b19f6
--- /dev/null
+++ b/changelog.d/13712.misc
@@ -0,0 +1 @@
+Fix typechecking with latest types-jsonschema.
diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py
index b007147519..102889ac49 100644
--- a/synapse/api/filtering.py
+++ b/synapse/api/filtering.py
@@ -140,13 +140,13 @@ USER_FILTER_SCHEMA = {
 
 
 @FormatChecker.cls_checks("matrix_room_id")
-def matrix_room_id_validator(room_id_str: str) -> RoomID:
-    return RoomID.from_string(room_id_str)
+def matrix_room_id_validator(room_id_str: str) -> bool:
+    return RoomID.is_valid(room_id_str)
 
 
 @FormatChecker.cls_checks("matrix_user_id")
-def matrix_user_id_validator(user_id_str: str) -> UserID:
-    return UserID.from_string(user_id_str)
+def matrix_user_id_validator(user_id_str: str) -> bool:
+    return UserID.is_valid(user_id_str)
 
 
 class Filtering:
-- 
cgit 1.5.1


From 32fc3b7ba4702a0068a82bdd0595e2f426967d4d Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 6 Sep 2022 03:50:02 -0400
Subject: Remove configuration options for direct TCP replication. (#13647)

Removes the ability to configure legacy direct TCP replication. Workers now require Redis to run.
---
 .github/workflows/tests.yml                      |  1 -
 changelog.d/13647.removal                        |  1 +
 docs/upgrade.md                                  | 15 ++++++
 docs/usage/configuration/config_documentation.md |  2 -
 docs/workers.md                                  | 22 ++-------
 synapse/app/homeserver.py                        | 11 -----
 synapse/config/server.py                         | 16 +++++--
 synapse/config/workers.py                        |  8 ++--
 synapse/replication/tcp/handler.py               | 58 +++++++++---------------
 tests/app/test_openid_listener.py                |  4 +-
 tests/test_server.py                             |  2 +-
 tests/utils.py                                   |  1 -
 12 files changed, 63 insertions(+), 78 deletions(-)
 create mode 100644 changelog.d/13647.removal

(limited to 'synapse')

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 3ce4ffb036..bc1de2893c 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -204,7 +204,6 @@ jobs:
         POSTGRES: ${{ matrix.job.postgres && 1}}
         MULTI_POSTGRES: ${{ (matrix.job.postgres == 'multi-postgres') && 1}}
         WORKERS: ${{ matrix.job.workers && 1 }}
-        REDIS: 1
         BLACKLIST: ${{ matrix.job.workers && 'synapse-blacklist-with-workers' }}
         TOP: ${{ github.workspace }}
 
diff --git a/changelog.d/13647.removal b/changelog.d/13647.removal
new file mode 100644
index 0000000000..0190a65dba
--- /dev/null
+++ b/changelog.d/13647.removal
@@ -0,0 +1 @@
+Remove the ability to use direct TCP replication with workers. Direct TCP replication was deprecated in Synapse v1.18.0. Workers now require using Redis.
diff --git a/docs/upgrade.md b/docs/upgrade.md
index 422a3da664..c6219d06e8 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -91,6 +91,21 @@ process, for example:
 
 # Upgrading to v1.67.0
 
+## Direct TCP replication is no longer supported: migrate to Redis
+
+Redis support was added in v1.13.0 with it becoming the recommended method in
+v1.18.0. It replaced the old direct TCP connections (which was deprecated as of
+v1.18.0) to the main process. With Redis, rather than all the workers connecting
+to the main process, all the workers and the main process connect to Redis,
+which relays replication commands between processes. This can give a significant
+CPU saving on the main process and is a prerequisite for upcoming
+performance improvements.
+
+To migrate to Redis add the [`redis` config](./workers.md#shared-configuration),
+and remove the TCP `replication` listener from config of the master and
+`worker_replication_port` from worker config. Note that a HTTP listener with a
+`replication` resource is still required.
+
 ## Minimum version of Poetry is now v1.2.0
 
 The minimum supported version of poetry is now 1.2. This should only affect
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 396c560822..757957a1d5 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -431,8 +431,6 @@ Sub-options for each listener include:
 
    * `metrics`: (see the docs [here](../../metrics-howto.md)),
 
-   * `replication`: (deprecated as of Synapse 1.18, see the docs [here](../../workers.md)).
-
 * `tls`: set to true to enable TLS for this listener. Will use the TLS key/cert specified in tls_private_key_path / tls_certificate_path.
 
 * `x_forwarded`: Only valid for an 'http' listener. Set to true to use the X-Forwarded-For header as the client IP. Useful when Synapse is
diff --git a/docs/workers.md b/docs/workers.md
index 176bb1475e..40b1852313 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -32,13 +32,8 @@ stream between all configured Synapse processes. Additionally, processes may
 make HTTP requests to each other, primarily for operations which need to wait
 for a reply ─ such as sending an event.
 
-Redis support was added in v1.13.0 with it becoming the recommended method in
-v1.18.0. It replaced the old direct TCP connections (which is deprecated as of
-v1.18.0) to the main process. With Redis, rather than all the workers connecting
-to the main process, all the workers and the main process connect to Redis,
-which relays replication commands between processes. This can give a significant
-cpu saving on the main process and will be a prerequisite for upcoming
-performance improvements.
+All the workers and the main process connect to Redis, which relays replication
+commands between processes.
 
 If Redis support is enabled Synapse will use it as a shared cache, as well as a
 pub/sub mechanism.
@@ -330,7 +325,6 @@ effects of bursts of events from that bridge on events sent by normal users.
 
 Additionally, the writing of specific streams (such as events) can be moved off
 of the main process to a particular worker.
-(This is only supported with Redis-based replication.)
 
 To enable this, the worker must have a HTTP replication listener configured,
 have a `worker_name` and be listed in the `instance_map` config. The same worker
@@ -600,15 +594,9 @@ equivalent to `synapse.app.generic_worker`:
 
 ## Migration from old config
 
-There are two main independent changes that have been made: introducing Redis
-support and merging apps into `synapse.app.generic_worker`. Both these changes
-are backwards compatible and so no changes to the config are required, however
-server admins are encouraged to plan to migrate to Redis as the old style direct
-TCP replication config is deprecated.
-
-To migrate to Redis add the `redis` config as above, and optionally remove the
-TCP `replication` listener from master and `worker_replication_port` from worker
-config.
+A main change that has occurred is the merging of worker apps into
+`synapse.app.generic_worker`. This change is backwards compatible and so no
+changes to the config are required.
 
 To migrate apps to use `synapse.app.generic_worker` simply update the
 `worker_app` option in the worker configs, and where worker are started (e.g.
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index e57a926032..883f2fd2ec 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -57,7 +57,6 @@ from synapse.http.site import SynapseSite
 from synapse.logging.context import LoggingContext
 from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy
 from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource
-from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory
 from synapse.rest import ClientRestResource
 from synapse.rest.admin import AdminRestResource
 from synapse.rest.health import HealthResource
@@ -290,16 +289,6 @@ class SynapseHomeServer(HomeServer):
                     manhole_settings=self.config.server.manhole_settings,
                     manhole_globals={"hs": self},
                 )
-            elif listener.type == "replication":
-                services = listen_tcp(
-                    listener.bind_addresses,
-                    listener.port,
-                    ReplicationStreamProtocolFactory(self),
-                )
-                for s in services:
-                    self.get_reactor().addSystemEventTrigger(
-                        "before", "shutdown", s.stopListening
-                    )
             elif listener.type == "metrics":
                 if not self.config.metrics.enable_metrics:
                     logger.warning(
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 085fe22c51..c91df636d9 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -36,6 +36,12 @@ from ._util import validate_config
 
 logger = logging.Logger(__name__)
 
+DIRECT_TCP_ERROR = """
+Using direct TCP replication for workers is no longer supported.
+
+Please see https://matrix-org.github.io/synapse/latest/upgrade.html#direct-tcp-replication-is-no-longer-supported-migrate-to-redis
+"""
+
 # by default, we attempt to listen on both '::' *and* '0.0.0.0' because some OSes
 # (Windows, macOS, other BSD/Linux where net.ipv6.bindv6only is set) will only listen
 # on IPv6 when '::' is set.
@@ -165,7 +171,6 @@ KNOWN_LISTENER_TYPES = {
     "http",
     "metrics",
     "manhole",
-    "replication",
 }
 
 KNOWN_RESOURCES = {
@@ -515,7 +520,9 @@ class ServerConfig(Config):
         ):
             raise ConfigError("allowed_avatar_mimetypes must be a list")
 
-        self.listeners = [parse_listener_def(x) for x in config.get("listeners", [])]
+        self.listeners = [
+            parse_listener_def(i, x) for i, x in enumerate(config.get("listeners", []))
+        ]
 
         # no_tls is not really supported any more, but let's grandfather it in
         # here.
@@ -880,9 +887,12 @@ def read_gc_thresholds(
         )
 
 
-def parse_listener_def(listener: Any) -> ListenerConfig:
+def parse_listener_def(num: int, listener: Any) -> ListenerConfig:
     """parse a listener config from the config file"""
     listener_type = listener["type"]
+    # Raise a helpful error if direct TCP replication is still configured.
+    if listener_type == "replication":
+        raise ConfigError(DIRECT_TCP_ERROR, ("listeners", str(num), "type"))
 
     port = listener.get("port")
     if not isinstance(port, int):
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index f2716422b5..0fb725dd8f 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -27,7 +27,7 @@ from ._base import (
     RoutableShardedWorkerHandlingConfig,
     ShardedWorkerHandlingConfig,
 )
-from .server import ListenerConfig, parse_listener_def
+from .server import DIRECT_TCP_ERROR, ListenerConfig, parse_listener_def
 
 _FEDERATION_SENDER_WITH_SEND_FEDERATION_ENABLED_ERROR = """
 The send_federation config option must be disabled in the main
@@ -128,7 +128,8 @@ class WorkerConfig(Config):
             self.worker_app = None
 
         self.worker_listeners = [
-            parse_listener_def(x) for x in config.get("worker_listeners", [])
+            parse_listener_def(i, x)
+            for i, x in enumerate(config.get("worker_listeners", []))
         ]
         self.worker_daemonize = bool(config.get("worker_daemonize"))
         self.worker_pid_file = config.get("worker_pid_file")
@@ -142,7 +143,8 @@ class WorkerConfig(Config):
         self.worker_replication_host = config.get("worker_replication_host", None)
 
         # The port on the main synapse for TCP replication
-        self.worker_replication_port = config.get("worker_replication_port", None)
+        if "worker_replication_port" in config:
+            raise ConfigError(DIRECT_TCP_ERROR, ("worker_replication_port",))
 
         # The port on the main synapse for HTTP replication endpoint
         self.worker_replication_http_port = config.get("worker_replication_http_port")
diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py
index e1cbfa50eb..0f166d16aa 100644
--- a/synapse/replication/tcp/handler.py
+++ b/synapse/replication/tcp/handler.py
@@ -35,7 +35,6 @@ from twisted.internet.protocol import ReconnectingClientFactory
 
 from synapse.metrics import LaterGauge
 from synapse.metrics.background_process_metrics import run_as_background_process
-from synapse.replication.tcp.client import DirectTcpReplicationClientFactory
 from synapse.replication.tcp.commands import (
     ClearUserSyncsCommand,
     Command,
@@ -332,46 +331,31 @@ class ReplicationCommandHandler:
 
     def start_replication(self, hs: "HomeServer") -> None:
         """Helper method to start replication."""
-        if hs.config.redis.redis_enabled:
-            from synapse.replication.tcp.redis import (
-                RedisDirectTcpReplicationClientFactory,
-            )
+        from synapse.replication.tcp.redis import RedisDirectTcpReplicationClientFactory
 
-            # First let's ensure that we have a ReplicationStreamer started.
-            hs.get_replication_streamer()
+        # First let's ensure that we have a ReplicationStreamer started.
+        hs.get_replication_streamer()
 
-            # We need two connections to redis, one for the subscription stream and
-            # one to send commands to (as you can't send further redis commands to a
-            # connection after SUBSCRIBE is called).
+        # We need two connections to redis, one for the subscription stream and
+        # one to send commands to (as you can't send further redis commands to a
+        # connection after SUBSCRIBE is called).
 
-            # First create the connection for sending commands.
-            outbound_redis_connection = hs.get_outbound_redis_connection()
+        # First create the connection for sending commands.
+        outbound_redis_connection = hs.get_outbound_redis_connection()
 
-            # Now create the factory/connection for the subscription stream.
-            self._factory = RedisDirectTcpReplicationClientFactory(
-                hs,
-                outbound_redis_connection,
-                channel_names=self._channels_to_subscribe_to,
-            )
-            hs.get_reactor().connectTCP(
-                hs.config.redis.redis_host,
-                hs.config.redis.redis_port,
-                self._factory,
-                timeout=30,
-                bindAddress=None,
-            )
-        else:
-            client_name = hs.get_instance_name()
-            self._factory = DirectTcpReplicationClientFactory(hs, client_name, self)
-            host = hs.config.worker.worker_replication_host
-            port = hs.config.worker.worker_replication_port
-            hs.get_reactor().connectTCP(
-                host,
-                port,
-                self._factory,
-                timeout=30,
-                bindAddress=None,
-            )
+        # Now create the factory/connection for the subscription stream.
+        self._factory = RedisDirectTcpReplicationClientFactory(
+            hs,
+            outbound_redis_connection,
+            channel_names=self._channels_to_subscribe_to,
+        )
+        hs.get_reactor().connectTCP(
+            hs.config.redis.redis_host,
+            hs.config.redis.redis_port,
+            self._factory,
+            timeout=30,
+            bindAddress=None,
+        )
 
     def get_streams(self) -> Dict[str, Stream]:
         """Get a map from stream name to all streams."""
diff --git a/tests/app/test_openid_listener.py b/tests/app/test_openid_listener.py
index 264e101082..c7dae58eb5 100644
--- a/tests/app/test_openid_listener.py
+++ b/tests/app/test_openid_listener.py
@@ -61,7 +61,7 @@ class FederationReaderOpenIDListenerTests(HomeserverTestCase):
         }
 
         # Listen with the config
-        self.hs._listen_http(parse_listener_def(config))
+        self.hs._listen_http(parse_listener_def(0, config))
 
         # Grab the resource from the site that was told to listen
         site = self.reactor.tcpServers[0][1]
@@ -109,7 +109,7 @@ class SynapseHomeserverOpenIDListenerTests(HomeserverTestCase):
         }
 
         # Listen with the config
-        self.hs._listener_http(self.hs.config, parse_listener_def(config))
+        self.hs._listener_http(self.hs.config, parse_listener_def(0, config))
 
         # Grab the resource from the site that was told to listen
         site = self.reactor.tcpServers[0][1]
diff --git a/tests/test_server.py b/tests/test_server.py
index 23975d59c3..7c66448245 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -228,7 +228,7 @@ class OptionsResourceTests(unittest.TestCase):
         site = SynapseSite(
             "test",
             "site_tag",
-            parse_listener_def({"type": "http", "port": 0}),
+            parse_listener_def(0, {"type": "http", "port": 0}),
             self.resource,
             "1.0",
             max_request_body_size=4096,
diff --git a/tests/utils.py b/tests/utils.py
index d2c6d1e852..65db437697 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -135,7 +135,6 @@ def default_config(
         "enable_registration_captcha": False,
         "macaroon_secret_key": "not even a little secret",
         "password_providers": [],
-        "worker_replication_url": "",
         "worker_app": None,
         "block_non_admin_invites": False,
         "federation_domain_whitelist": None,
-- 
cgit 1.5.1


From b455c2a5ec4874a6897c0448631d8ab8f36f9339 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Tue, 6 Sep 2022 11:21:21 +0000
Subject: Update Grafana dashboard to not use legacy metric names. (#13714)

---
 changelog.d/13714.misc                |   1 +
 contrib/grafana/synapse.json          | 138 +++++++++++++++++-----------------
 synapse/metrics/_legacy_exposition.py |   4 +-
 synapse/util/caches/__init__.py       |   4 +-
 4 files changed, 74 insertions(+), 73 deletions(-)
 create mode 100644 changelog.d/13714.misc

(limited to 'synapse')

diff --git a/changelog.d/13714.misc b/changelog.d/13714.misc
new file mode 100644
index 0000000000..07ace50b12
--- /dev/null
+++ b/changelog.d/13714.misc
@@ -0,0 +1 @@
+Add experimental configuration option to allow disabling legacy Prometheus metric names.
\ No newline at end of file
diff --git a/contrib/grafana/synapse.json b/contrib/grafana/synapse.json
index 248cd6d9ad..58061e2fce 100644
--- a/contrib/grafana/synapse.json
+++ b/contrib/grafana/synapse.json
@@ -335,7 +335,7 @@
           "datasource": {
             "uid": "$datasource"
           },
-          "expr": "sum(rate(synapse_storage_events_persisted_events{instance=\"$instance\"}[$bucket_size]))",
+          "expr": "sum(rate(synapse_storage_events_persisted_events_total{instance=\"$instance\"}[$bucket_size]))",
           "hide": false,
           "instant": false,
           "legendFormat": "Events",
@@ -1423,7 +1423,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "rate(synapse_background_process_ru_utime_seconds{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])+rate(synapse_background_process_ru_stime_seconds{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])",
+              "expr": "rate(synapse_background_process_ru_utime_seconds_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])+rate(synapse_background_process_ru_stime_seconds_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])",
               "format": "time_series",
               "hide": false,
               "instant": false,
@@ -1804,7 +1804,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "sum(rate(synapse_storage_events_persisted_events{instance=\"$instance\"}[$bucket_size])) without (job,index)",
+              "expr": "sum(rate(synapse_storage_events_persisted_events_total{instance=\"$instance\"}[$bucket_size])) without (job,index)",
               "format": "time_series",
               "interval": "",
               "intervalFactor": 2,
@@ -2437,7 +2437,7 @@
                 "uid": "$datasource"
               },
               "exemplar": false,
-              "expr": "sum(rate(synapse_state_res_db_for_biggest_room_seconds{instance=\"$instance\"}[1m]))",
+              "expr": "sum(rate(synapse_state_res_db_for_biggest_room_seconds_total{instance=\"$instance\"}[1m]))",
               "format": "time_series",
               "hide": false,
               "instant": false,
@@ -2451,7 +2451,7 @@
                 "uid": "$datasource"
               },
               "exemplar": false,
-              "expr": "sum(rate(synapse_state_res_cpu_for_biggest_room_seconds{instance=\"$instance\"}[1m]))",
+              "expr": "sum(rate(synapse_state_res_cpu_for_biggest_room_seconds_total{instance=\"$instance\"}[1m]))",
               "format": "time_series",
               "hide": false,
               "instant": false,
@@ -3425,7 +3425,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "rate(synapse_background_process_ru_utime_seconds{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])+rate(synapse_background_process_ru_stime_seconds{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])",
+              "expr": "rate(synapse_background_process_ru_utime_seconds_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])+rate(synapse_background_process_ru_stime_seconds_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])",
               "format": "time_series",
               "interval": "",
               "intervalFactor": 1,
@@ -3518,7 +3518,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "rate(synapse_background_process_db_txn_duration_seconds{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size]) +  rate(synapse_background_process_db_sched_duration_seconds{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])",
+              "expr": "rate(synapse_background_process_db_txn_duration_seconds_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size]) +  rate(synapse_background_process_db_sched_duration_seconds_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])",
               "format": "time_series",
               "hide": false,
               "intervalFactor": 1,
@@ -3726,7 +3726,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "sum(rate(synapse_federation_client_sent_transactions{instance=\"$instance\"}[$bucket_size]))",
+              "expr": "sum(rate(synapse_federation_client_sent_transactions_total{instance=\"$instance\"}[$bucket_size]))",
               "format": "time_series",
               "intervalFactor": 1,
               "legendFormat": "successful txn rate",
@@ -3736,7 +3736,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "sum(rate(synapse_util_metrics_block_count{block_name=\"_send_new_transaction\",instance=\"$instance\"}[$bucket_size]) - ignoring (block_name) rate(synapse_federation_client_sent_transactions{instance=\"$instance\"}[$bucket_size]))",
+              "expr": "sum(rate(synapse_util_metrics_block_count_total{block_name=\"_send_new_transaction\",instance=\"$instance\"}[$bucket_size]) - ignoring (block_name) rate(synapse_federation_client_sent_transactions_total{instance=\"$instance\"}[$bucket_size]))",
               "legendFormat": "failed txn rate",
               "refId": "B"
             }
@@ -3826,7 +3826,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "sum(rate(synapse_federation_server_received_pdus{instance=~\"$instance\"}[$bucket_size]))",
+              "expr": "sum(rate(synapse_federation_server_received_pdus_total{instance=~\"$instance\"}[$bucket_size]))",
               "format": "time_series",
               "intervalFactor": 1,
               "legendFormat": "pdus",
@@ -3836,7 +3836,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "sum(rate(synapse_federation_server_received_edus{instance=~\"$instance\"}[$bucket_size]))",
+              "expr": "sum(rate(synapse_federation_server_received_edus_total{instance=~\"$instance\"}[$bucket_size]))",
               "format": "time_series",
               "intervalFactor": 1,
               "legendFormat": "edus",
@@ -3928,7 +3928,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "sum(rate(synapse_federation_client_sent_pdu_destinations:total{instance=\"$instance\"}[$bucket_size]))",
+              "expr": "sum(rate(synapse_federation_client_sent_pdu_destinations:total_total{instance=\"$instance\"}[$bucket_size]))",
               "format": "time_series",
               "interval": "",
               "intervalFactor": 1,
@@ -3939,7 +3939,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "sum(rate(synapse_federation_client_sent_edus{instance=\"$instance\"}[$bucket_size]))",
+              "expr": "sum(rate(synapse_federation_client_sent_edus_total{instance=\"$instance\"}[$bucket_size]))",
               "format": "time_series",
               "intervalFactor": 1,
               "legendFormat": "edus",
@@ -5042,7 +5042,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "rate(synapse_http_httppusher_http_pushes_processed{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size]) and on (instance, job, index) (synapse_http_httppusher_http_pushes_failed + synapse_http_httppusher_http_pushes_processed) > 0",
+              "expr": "rate(synapse_http_httppusher_http_pushes_processed_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size]) and on (instance, job, index) (synapse_http_httppusher_http_pushes_failed_total + synapse_http_httppusher_http_pushes_processed_total) > 0",
               "format": "time_series",
               "interval": "",
               "intervalFactor": 2,
@@ -5054,7 +5054,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "rate(synapse_http_httppusher_http_pushes_failed{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size]) and on (instance, job, index) (synapse_http_httppusher_http_pushes_failed + synapse_http_httppusher_http_pushes_processed) > 0",
+              "expr": "rate(synapse_http_httppusher_http_pushes_failed_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size]) and on (instance, job, index) (synapse_http_httppusher_http_pushes_failed_total + synapse_http_httppusher_http_pushes_processed_total) > 0",
               "format": "time_series",
               "intervalFactor": 2,
               "legendFormat": "failed {{job}}",
@@ -5268,12 +5268,12 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "exemplar": true,
-              "expr": "sum(rate(synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter{job=\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size]))",
+              "expr": "sum(rate(synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter_total{job=\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size]))",
               "format": "time_series",
               "interval": "",
               "intervalFactor": 2,
               "legendFormat": "{{index}}",
-              "metric": "synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter",
+              "metric": "synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter_total",
               "refId": "A",
               "step": 2
             }
@@ -5369,12 +5369,12 @@
                 "uid": "$datasource"
               },
               "exemplar": true,
-              "expr": "sum(rate(synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter{job=\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size]))",
+              "expr": "sum(rate(synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter_total{job=\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size]))",
               "format": "time_series",
               "interval": "",
               "intervalFactor": 2,
               "legendFormat": "{{index}}",
-              "metric": "synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter",
+              "metric": "synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter_total",
               "refId": "A",
               "step": 2
             }
@@ -5475,12 +5475,12 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "exemplar": true,
-              "expr": "sum(rate(synapse_util_caches_cache:hits{job=\"$job\",index=~\"$index\",name=\"push_rules_delta_state_cache_metric\",instance=\"$instance\"}[$bucket_size]))/sum(rate(synapse_util_caches_cache:total{job=\"$job\",index=~\"$index\", name=\"push_rules_delta_state_cache_metric\",instance=\"$instance\"}[$bucket_size]))",
+              "expr": "sum(rate(synapse_util_caches_cache_hits{job=\"$job\",index=~\"$index\",name=\"push_rules_delta_state_cache_metric\",instance=\"$instance\"}[$bucket_size]))/sum(rate(synapse_util_caches_cache{job=\"$job\",index=~\"$index\", name=\"push_rules_delta_state_cache_metric\",instance=\"$instance\"}[$bucket_size]))",
               "format": "time_series",
               "interval": "",
               "intervalFactor": 2,
               "legendFormat": "Hit Rate",
-              "metric": "synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter",
+              "metric": "synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter_total",
               "refId": "A",
               "step": 2
             },
@@ -5490,7 +5490,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "exemplar": true,
-              "expr": "sum(rate(synapse_util_caches_cache:total{job=\"$job\",index=~\"$index\", name=\"push_rules_delta_state_cache_metric\",instance=\"$instance\"}[$bucket_size]))",
+              "expr": "sum(rate(synapse_util_caches_cache{job=\"$job\",index=~\"$index\", name=\"push_rules_delta_state_cache_metric\",instance=\"$instance\"}[$bucket_size]))",
               "format": "time_series",
               "interval": "",
               "intervalFactor": 2,
@@ -5598,12 +5598,12 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "exemplar": true,
-              "expr": "sum(rate(synapse_util_caches_cache:hits{job=\"$job\",index=~\"$index\",name=\"room_push_rule_cache\",instance=\"$instance\"}[$bucket_size]))/sum(rate(synapse_util_caches_cache:total{job=\"$job\",index=~\"$index\", name=\"room_push_rule_cache\",instance=\"$instance\"}[$bucket_size]))",
+              "expr": "sum(rate(synapse_util_caches_cache_hits{job=\"$job\",index=~\"$index\",name=\"room_push_rule_cache\",instance=\"$instance\"}[$bucket_size]))/sum(rate(synapse_util_caches_cache{job=\"$job\",index=~\"$index\", name=\"room_push_rule_cache\",instance=\"$instance\"}[$bucket_size]))",
               "format": "time_series",
               "interval": "",
               "intervalFactor": 2,
               "legendFormat": "Hit Rate",
-              "metric": "synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter",
+              "metric": "synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter_total",
               "refId": "A",
               "step": 2
             },
@@ -5613,7 +5613,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "exemplar": true,
-              "expr": "sum(rate(synapse_util_caches_cache:total{job=\"$job\",index=~\"$index\", name=\"room_push_rule_cache\",instance=\"$instance\"}[$bucket_size]))",
+              "expr": "sum(rate(synapse_util_caches_cache{job=\"$job\",index=~\"$index\", name=\"room_push_rule_cache\",instance=\"$instance\"}[$bucket_size]))",
               "format": "time_series",
               "interval": "",
               "intervalFactor": 2,
@@ -5719,12 +5719,12 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "exemplar": true,
-              "expr": "sum(rate(synapse_util_caches_cache:hits{job=\"$job\",index=~\"$index\",name=\"_get_rules_for_room\",instance=\"$instance\"}[$bucket_size]))/sum(rate(synapse_util_caches_cache:total{job=\"$job\",index=~\"$index\", name=\"_get_rules_for_room\",instance=\"$instance\"}[$bucket_size]))",
+              "expr": "sum(rate(synapse_util_caches_cache_hits{job=\"$job\",index=~\"$index\",name=\"_get_rules_for_room\",instance=\"$instance\"}[$bucket_size]))/sum(rate(synapse_util_caches_cache{job=\"$job\",index=~\"$index\", name=\"_get_rules_for_room\",instance=\"$instance\"}[$bucket_size]))",
               "format": "time_series",
               "interval": "",
               "intervalFactor": 2,
               "legendFormat": "Hit Rate",
-              "metric": "synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter",
+              "metric": "synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter_total",
               "refId": "A",
               "step": 2
             },
@@ -5734,7 +5734,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "exemplar": true,
-              "expr": "sum(rate(synapse_util_caches_cache:total{job=\"$job\",index=~\"$index\", name=\"_get_rules_for_room\",instance=\"$instance\"}[$bucket_size]))",
+              "expr": "sum(rate(synapse_util_caches_cache{job=\"$job\",index=~\"$index\", name=\"_get_rules_for_room\",instance=\"$instance\"}[$bucket_size]))",
               "format": "time_series",
               "interval": "",
               "intervalFactor": 2,
@@ -6087,7 +6087,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "topk(10, rate(synapse_storage_transaction_time_count{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size]))",
+              "expr": "topk(10, rate(synapse_storage_transaction_time_count_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size]))",
               "format": "time_series",
               "interval": "",
               "intervalFactor": 2,
@@ -6187,7 +6187,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "rate(synapse_storage_transaction_time_sum{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])",
+              "expr": "rate(synapse_storage_transaction_time_sum_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])",
               "format": "time_series",
               "instant": false,
               "interval": "",
@@ -6287,7 +6287,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "rate(synapse_storage_transaction_time_sum{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])/rate(synapse_storage_transaction_time_count{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])",
+              "expr": "rate(synapse_storage_transaction_time_sum_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])/rate(synapse_storage_transaction_time_count_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])",
               "format": "time_series",
               "instant": false,
               "interval": "",
@@ -6538,7 +6538,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "rate(synapse_util_metrics_block_ru_utime_seconds{instance=\"$instance\",job=~\"$job\",index=~\"$index\",block_name!=\"wrapped_request_handler\"}[$bucket_size]) + rate(synapse_util_metrics_block_ru_stime_seconds[$bucket_size])",
+              "expr": "rate(synapse_util_metrics_block_ru_utime_seconds_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\",block_name!=\"wrapped_request_handler\"}[$bucket_size]) + rate(synapse_util_metrics_block_ru_stime_seconds_total[$bucket_size])",
               "format": "time_series",
               "interval": "",
               "intervalFactor": 2,
@@ -6636,7 +6636,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "(rate(synapse_util_metrics_block_ru_utime_seconds{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size]) + rate(synapse_util_metrics_block_ru_stime_seconds[$bucket_size])) / rate(synapse_util_metrics_block_count[$bucket_size])",
+              "expr": "(rate(synapse_util_metrics_block_ru_utime_seconds_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size]) + rate(synapse_util_metrics_block_ru_stime_seconds_total[$bucket_size])) / rate(synapse_util_metrics_block_count_total[$bucket_size])",
               "format": "time_series",
               "interval": "",
               "intervalFactor": 2,
@@ -6737,7 +6737,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "exemplar": true,
-              "expr": "rate(synapse_util_metrics_block_db_txn_duration_seconds{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])",
+              "expr": "rate(synapse_util_metrics_block_db_txn_duration_seconds_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])",
               "format": "time_series",
               "interval": "",
               "intervalFactor": 2,
@@ -6839,7 +6839,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "rate(synapse_util_metrics_block_db_txn_duration_seconds{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size]) / rate(synapse_util_metrics_block_db_txn_count{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])",
+              "expr": "rate(synapse_util_metrics_block_db_txn_duration_seconds_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size]) / rate(synapse_util_metrics_block_db_txn_count_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])",
               "format": "time_series",
               "interval": "",
               "intervalFactor": 2,
@@ -6936,7 +6936,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "rate(synapse_util_metrics_block_db_txn_duration_seconds{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size]) / rate(synapse_util_metrics_block_db_txn_count{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])",
+              "expr": "rate(synapse_util_metrics_block_db_txn_duration_seconds_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size]) / rate(synapse_util_metrics_block_db_txn_count_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])",
               "format": "time_series",
               "interval": "",
               "intervalFactor": 2,
@@ -7033,7 +7033,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "rate(synapse_util_metrics_block_time_seconds{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size]) / rate(synapse_util_metrics_block_count{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])",
+              "expr": "rate(synapse_util_metrics_block_time_seconds_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size]) / rate(synapse_util_metrics_block_count_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])",
               "format": "time_series",
               "interval": "",
               "intervalFactor": 2,
@@ -7122,7 +7122,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "rate(synapse_util_metrics_block_count{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])",
+              "expr": "rate(synapse_util_metrics_block_count_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])",
               "interval": "",
               "legendFormat": "{{job}}-{{index}} {{block_name}}",
               "refId": "A"
@@ -7246,7 +7246,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "rate(synapse_util_caches_cache:hits{job=~\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size])/rate(synapse_util_caches_cache:total{job=~\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size])",
+              "expr": "rate(synapse_util_caches_cache_hits{job=~\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size])/rate(synapse_util_caches_cache{job=~\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size])",
               "format": "time_series",
               "intervalFactor": 2,
               "legendFormat": "{{name}} {{job}}-{{index}}",
@@ -7347,7 +7347,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "synapse_util_caches_cache:size{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}",
+              "expr": "synapse_util_caches_cache_size{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}",
               "format": "time_series",
               "hide": false,
               "interval": "",
@@ -7447,7 +7447,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "rate(synapse_util_caches_cache:total{job=~\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size])",
+              "expr": "rate(synapse_util_caches_cache{job=~\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size])",
               "format": "time_series",
               "interval": "",
               "intervalFactor": 2,
@@ -7547,7 +7547,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "topk(10, rate(synapse_util_caches_cache:total{job=~\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size]) - rate(synapse_util_caches_cache:hits{job=~\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size]))",
+              "expr": "topk(10, rate(synapse_util_caches_cache{job=~\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size]) - rate(synapse_util_caches_cache_hits{job=~\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size]))",
               "format": "time_series",
               "interval": "",
               "intervalFactor": 2,
@@ -7643,7 +7643,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "rate(synapse_util_caches_cache:evicted_size{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])",
+              "expr": "rate(synapse_util_caches_cache_evicted_size{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])",
               "format": "time_series",
               "interval": "",
               "intervalFactor": 1,
@@ -7763,7 +7763,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "synapse_util_caches_response_cache:size{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}",
+              "expr": "synapse_util_caches_response_cache_size{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}",
               "interval": "",
               "legendFormat": "{{name}} {{job}}-{{index}}",
               "refId": "A"
@@ -7853,7 +7853,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "rate(synapse_util_caches_response_cache:hits{instance=\"$instance\", job=~\"$job\", index=~\"$index\"}[$bucket_size])/rate(synapse_util_caches_response_cache:total{instance=\"$instance\", job=~\"$job\", index=~\"$index\"}[$bucket_size])",
+              "expr": "rate(synapse_util_caches_response_cache_hits{instance=\"$instance\", job=~\"$job\", index=~\"$index\"}[$bucket_size])/rate(synapse_util_caches_response_cache{instance=\"$instance\", job=~\"$job\", index=~\"$index\"}[$bucket_size])",
               "interval": "",
               "legendFormat": "{{name}} {{job}}-{{index}}",
               "refId": "A"
@@ -9556,7 +9556,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "synapse_forward_extremities_bucket{instance=\"$instance\"} and on (index, instance, job) (synapse_storage_events_persisted_events > 0)",
+              "expr": "synapse_forward_extremities_bucket{instance=\"$instance\"} and on (index, instance, job) (synapse_storage_events_persisted_events_total > 0)",
               "format": "heatmap",
               "intervalFactor": 1,
               "legendFormat": "{{le}}",
@@ -9716,7 +9716,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "rate(synapse_storage_events_forward_extremities_persisted_bucket{instance=\"$instance\"}[$bucket_size]) and on (index, instance, job) (synapse_storage_events_persisted_events > 0)",
+              "expr": "rate(synapse_storage_events_forward_extremities_persisted_bucket{instance=\"$instance\"}[$bucket_size]) and on (index, instance, job) (synapse_storage_events_persisted_events_total > 0)",
               "format": "heatmap",
               "intervalFactor": 1,
               "legendFormat": "{{le}}",
@@ -9793,7 +9793,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "histogram_quantile(0.5, rate(synapse_storage_events_forward_extremities_persisted_bucket{instance=\"$instance\"}[$bucket_size]) and on (index, instance, job) (synapse_storage_events_persisted_events > 0))",
+              "expr": "histogram_quantile(0.5, rate(synapse_storage_events_forward_extremities_persisted_bucket{instance=\"$instance\"}[$bucket_size]) and on (index, instance, job) (synapse_storage_events_persisted_events_total > 0))",
               "format": "time_series",
               "intervalFactor": 1,
               "legendFormat": "50%",
@@ -9803,7 +9803,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "histogram_quantile(0.75, rate(synapse_storage_events_forward_extremities_persisted_bucket{instance=\"$instance\"}[$bucket_size]) and on (index, instance, job) (synapse_storage_events_persisted_events > 0))",
+              "expr": "histogram_quantile(0.75, rate(synapse_storage_events_forward_extremities_persisted_bucket{instance=\"$instance\"}[$bucket_size]) and on (index, instance, job) (synapse_storage_events_persisted_events_total > 0))",
               "format": "time_series",
               "intervalFactor": 1,
               "legendFormat": "75%",
@@ -9813,7 +9813,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "histogram_quantile(0.90, rate(synapse_storage_events_forward_extremities_persisted_bucket{instance=\"$instance\"}[$bucket_size]) and on (index, instance, job) (synapse_storage_events_persisted_events > 0))",
+              "expr": "histogram_quantile(0.90, rate(synapse_storage_events_forward_extremities_persisted_bucket{instance=\"$instance\"}[$bucket_size]) and on (index, instance, job) (synapse_storage_events_persisted_events_total > 0))",
               "format": "time_series",
               "intervalFactor": 1,
               "legendFormat": "90%",
@@ -9823,7 +9823,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "histogram_quantile(0.99, rate(synapse_storage_events_forward_extremities_persisted_bucket{instance=\"$instance\"}[$bucket_size]) and on (index, instance, job) (synapse_storage_events_persisted_events > 0))",
+              "expr": "histogram_quantile(0.99, rate(synapse_storage_events_forward_extremities_persisted_bucket{instance=\"$instance\"}[$bucket_size]) and on (index, instance, job) (synapse_storage_events_persisted_events_total > 0))",
               "format": "time_series",
               "intervalFactor": 1,
               "legendFormat": "99%",
@@ -9905,7 +9905,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "rate(synapse_storage_events_stale_forward_extremities_persisted_bucket{instance=\"$instance\"}[$bucket_size]) and on (index, instance, job) (synapse_storage_events_persisted_events > 0)",
+              "expr": "rate(synapse_storage_events_stale_forward_extremities_persisted_bucket{instance=\"$instance\"}[$bucket_size]) and on (index, instance, job) (synapse_storage_events_persisted_events_total > 0)",
               "format": "heatmap",
               "intervalFactor": 1,
               "legendFormat": "{{le}}",
@@ -9982,7 +9982,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "histogram_quantile(0.5, rate(synapse_storage_events_stale_forward_extremities_persisted_bucket{instance=\"$instance\"}[$bucket_size]) and on (index, instance, job) (synapse_storage_events_persisted_events > 0))",
+              "expr": "histogram_quantile(0.5, rate(synapse_storage_events_stale_forward_extremities_persisted_bucket{instance=\"$instance\"}[$bucket_size]) and on (index, instance, job) (synapse_storage_events_persisted_events_total > 0))",
               "format": "time_series",
               "intervalFactor": 1,
               "legendFormat": "50%",
@@ -9992,7 +9992,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "histogram_quantile(0.75, rate(synapse_storage_events_stale_forward_extremities_persisted_bucket{instance=\"$instance\"}[$bucket_size]) and on (index, instance, job) (synapse_storage_events_persisted_events > 0))",
+              "expr": "histogram_quantile(0.75, rate(synapse_storage_events_stale_forward_extremities_persisted_bucket{instance=\"$instance\"}[$bucket_size]) and on (index, instance, job) (synapse_storage_events_persisted_events_total > 0))",
               "format": "time_series",
               "intervalFactor": 1,
               "legendFormat": "75%",
@@ -10002,7 +10002,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "histogram_quantile(0.90, rate(synapse_storage_events_stale_forward_extremities_persisted_bucket{instance=\"$instance\"}[$bucket_size]) and on (index, instance, job) (synapse_storage_events_persisted_events > 0))",
+              "expr": "histogram_quantile(0.90, rate(synapse_storage_events_stale_forward_extremities_persisted_bucket{instance=\"$instance\"}[$bucket_size]) and on (index, instance, job) (synapse_storage_events_persisted_events_total > 0))",
               "format": "time_series",
               "intervalFactor": 1,
               "legendFormat": "90%",
@@ -10012,7 +10012,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "histogram_quantile(0.99, rate(synapse_storage_events_stale_forward_extremities_persisted_bucket{instance=\"$instance\"}[$bucket_size]) and on (index, instance, job) (synapse_storage_events_persisted_events > 0))",
+              "expr": "histogram_quantile(0.99, rate(synapse_storage_events_stale_forward_extremities_persisted_bucket{instance=\"$instance\"}[$bucket_size]) and on (index, instance, job) (synapse_storage_events_persisted_events_total > 0))",
               "format": "time_series",
               "intervalFactor": 1,
               "legendFormat": "99%",
@@ -10297,7 +10297,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "sum(rate(synapse_storage_events_state_resolutions_during_persistence{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size]))",
+              "expr": "sum(rate(synapse_storage_events_state_resolutions_during_persistence_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size]))",
               "interval": "",
               "legendFormat": "State res ",
               "refId": "A"
@@ -10306,7 +10306,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "sum(rate(synapse_storage_events_potential_times_prune_extremities{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size]))",
+              "expr": "sum(rate(synapse_storage_events_potential_times_prune_extremities_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size]))",
               "interval": "",
               "legendFormat": "Potential to prune",
               "refId": "B"
@@ -10315,7 +10315,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "sum(rate(synapse_storage_events_times_pruned_extremities{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size]))",
+              "expr": "sum(rate(synapse_storage_events_times_pruned_extremities_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size]))",
               "interval": "",
               "legendFormat": "Pruned",
               "refId": "C"
@@ -11069,7 +11069,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "rate(synapse_handler_presence_notified_presence{job=\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size])",
+              "expr": "rate(synapse_handler_presence_notified_presence_total{job=\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size])",
               "interval": "",
               "legendFormat": "Notified",
               "refId": "A"
@@ -11078,7 +11078,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "rate(synapse_handler_presence_federation_presence_out{job=\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size])",
+              "expr": "rate(synapse_handler_presence_federation_presence_out_total{job=\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size])",
               "interval": "",
               "legendFormat": "Remote ping",
               "refId": "B"
@@ -11087,7 +11087,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "rate(synapse_handler_presence_presence_updates{job=\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size])",
+              "expr": "rate(synapse_handler_presence_presence_updates_total{job=\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size])",
               "interval": "",
               "legendFormat": "Total updates",
               "refId": "C"
@@ -11096,7 +11096,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "rate(synapse_handler_presence_federation_presence{job=\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size])",
+              "expr": "rate(synapse_handler_presence_federation_presence_total{job=\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size])",
               "interval": "",
               "legendFormat": "Remote updates",
               "refId": "D"
@@ -11105,7 +11105,7 @@
               "datasource": {
                 "uid": "$datasource"
               },
-              "expr": "rate(synapse_handler_presence_bump_active_time{job=\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size])",
+              "expr": "rate(synapse_handler_presence_bump_active_time_total{job=\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size])",
               "interval": "",
               "legendFormat": "Bump active time",
               "refId": "E"
@@ -11789,7 +11789,7 @@
         "name": "instance",
         "options": [],
         "query": {
-          "query": "label_values(synapse_util_metrics_block_ru_utime_seconds, instance)",
+          "query": "label_values(synapse_util_metrics_block_ru_utime_seconds_total, instance)",
           "refId": "Prometheus-instance-Variable-Query"
         },
         "refresh": 2,
@@ -11818,7 +11818,7 @@
         "name": "job",
         "options": [],
         "query": {
-          "query": "label_values(synapse_util_metrics_block_ru_utime_seconds, job)",
+          "query": "label_values(synapse_util_metrics_block_ru_utime_seconds_total, job)",
           "refId": "Prometheus-job-Variable-Query"
         },
         "refresh": 2,
@@ -11848,7 +11848,7 @@
         "name": "index",
         "options": [],
         "query": {
-          "query": "label_values(synapse_util_metrics_block_ru_utime_seconds, index)",
+          "query": "label_values(synapse_util_metrics_block_ru_utime_seconds_total, index)",
           "refId": "Prometheus-index-Variable-Query"
         },
         "refresh": 2,
@@ -11896,6 +11896,6 @@
   "timezone": "",
   "title": "Synapse",
   "uid": "000000012",
-  "version": 132,
+  "version": 133,
   "weekStart": ""
-}
\ No newline at end of file
+}
diff --git a/synapse/metrics/_legacy_exposition.py b/synapse/metrics/_legacy_exposition.py
index ff640a49af..6f00ff2a47 100644
--- a/synapse/metrics/_legacy_exposition.py
+++ b/synapse/metrics/_legacy_exposition.py
@@ -88,11 +88,11 @@ LEGACY_METRIC_NAMES = {
     "synapse_util_caches_cache_hits": "synapse_util_caches_cache:hits",
     "synapse_util_caches_cache_size": "synapse_util_caches_cache:size",
     "synapse_util_caches_cache_evicted_size": "synapse_util_caches_cache:evicted_size",
-    "synapse_util_caches_cache_total": "synapse_util_caches_cache:total",
+    "synapse_util_caches_cache": "synapse_util_caches_cache:total",
     "synapse_util_caches_response_cache_size": "synapse_util_caches_response_cache:size",
     "synapse_util_caches_response_cache_hits": "synapse_util_caches_response_cache:hits",
     "synapse_util_caches_response_cache_evicted_size": "synapse_util_caches_response_cache:evicted_size",
-    "synapse_util_caches_response_cache_total": "synapse_util_caches_response_cache:total",
+    "synapse_util_caches_response_cache": "synapse_util_caches_response_cache:total",
 }
 
 
diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py
index bdf9b0dc8c..d4a2b77c29 100644
--- a/synapse/util/caches/__init__.py
+++ b/synapse/util/caches/__init__.py
@@ -37,7 +37,7 @@ collectors_by_name: Dict[str, "CacheMetric"] = {}
 cache_size = Gauge("synapse_util_caches_cache_size", "", ["name"])
 cache_hits = Gauge("synapse_util_caches_cache_hits", "", ["name"])
 cache_evicted = Gauge("synapse_util_caches_cache_evicted_size", "", ["name", "reason"])
-cache_total = Gauge("synapse_util_caches_cache_total", "", ["name"])
+cache_total = Gauge("synapse_util_caches_cache", "", ["name"])
 cache_max_size = Gauge("synapse_util_caches_cache_max_size", "", ["name"])
 cache_memory_usage = Gauge(
     "synapse_util_caches_cache_size_bytes",
@@ -50,7 +50,7 @@ response_cache_hits = Gauge("synapse_util_caches_response_cache_hits", "", ["nam
 response_cache_evicted = Gauge(
     "synapse_util_caches_response_cache_evicted_size", "", ["name", "reason"]
 )
-response_cache_total = Gauge("synapse_util_caches_response_cache_total", "", ["name"])
+response_cache_total = Gauge("synapse_util_caches_response_cache", "", ["name"])
 
 
 class EvictionReason(Enum):
-- 
cgit 1.5.1


From a4ecb8e35309d780f5d4e93fb4998b90c9068e8a Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 6 Sep 2022 14:29:16 +0100
Subject: Actually fix typechecking with latest types-jsonschema (#13724)

---
 changelog.d/13724.misc   | 1 +
 synapse/api/filtering.py | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/13724.misc

(limited to 'synapse')

diff --git a/changelog.d/13724.misc b/changelog.d/13724.misc
new file mode 100644
index 0000000000..2c4f6b19f6
--- /dev/null
+++ b/changelog.d/13724.misc
@@ -0,0 +1 @@
+Fix typechecking with latest types-jsonschema.
diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py
index 102889ac49..f7f46f8d80 100644
--- a/synapse/api/filtering.py
+++ b/synapse/api/filtering.py
@@ -140,13 +140,13 @@ USER_FILTER_SCHEMA = {
 
 
 @FormatChecker.cls_checks("matrix_room_id")
-def matrix_room_id_validator(room_id_str: str) -> bool:
-    return RoomID.is_valid(room_id_str)
+def matrix_room_id_validator(room_id: object) -> bool:
+    return isinstance(room_id, str) and RoomID.is_valid(room_id)
 
 
 @FormatChecker.cls_checks("matrix_user_id")
-def matrix_user_id_validator(user_id_str: str) -> bool:
-    return UserID.is_valid(user_id_str)
+def matrix_user_id_validator(user_id: object) -> bool:
+    return isinstance(user_id, str) and UserID.is_valid(user_id)
 
 
 class Filtering:
-- 
cgit 1.5.1


From 26bc26586b4b95d63ce7e453e9312469843f796e Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Tue, 6 Sep 2022 19:28:44 +0000
Subject: Remove the unspecced room_id field in the /hierarchy response.
 (#13506)

This is a re-do of 57d334a13d983406ea452dfa203bbe4837509c4e (#13365),
which was backed out in 12abd724974a2311d5311272d26d2f8aa11734a9 (#13501).

The `room_id` field represented the parent space for each room
and was made redundant by changes in the API shape where the
`children_state` is now nested underneath each `room`.

The room ID of each child is in the `state_key` field and is still
available.
---
 changelog.d/13506.bugfix         | 1 +
 synapse/handlers/room_summary.py | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)
 create mode 100644 changelog.d/13506.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13506.bugfix b/changelog.d/13506.bugfix
new file mode 100644
index 0000000000..2e43668865
--- /dev/null
+++ b/changelog.d/13506.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse v1.41.0 where the `/hierarchy` API returned non-standard information (a `room_id` field under each entry in `children_state`).
\ No newline at end of file
diff --git a/synapse/handlers/room_summary.py b/synapse/handlers/room_summary.py
index 732b0310bc..ebd445adca 100644
--- a/synapse/handlers/room_summary.py
+++ b/synapse/handlers/room_summary.py
@@ -453,7 +453,6 @@ class RoomSummaryHandler:
                 "type": e.type,
                 "state_key": e.state_key,
                 "content": e.content,
-                "room_id": e.room_id,
                 "sender": e.sender,
                 "origin_server_ts": e.origin_server_ts,
             }
-- 
cgit 1.5.1


From bb5b47b62a11b14a3458e5a8aafd9ddaf1294199 Mon Sep 17 00:00:00 2001
From: Connor Davis <mail@connordav.is>
Date: Wed, 7 Sep 2022 05:54:44 -0400
Subject: Add Admin API to Fetch Messages Within a Particular Window (#13672)

This adds two new admin APIs that allow us to fetch messages from a room within a particular time.
---
 changelog.d/13672.feature      |   1 +
 docs/admin_api/rooms.md        | 145 +++++++++++++++++++++++++++++++++++++
 synapse/handlers/pagination.py |  37 ++++++----
 synapse/rest/admin/__init__.py |   4 ++
 synapse/rest/admin/rooms.py    | 104 +++++++++++++++++++++++++++
 tests/rest/admin/test_room.py  | 158 ++++++++++++++++++++++++++++++++++++++++-
 6 files changed, 435 insertions(+), 14 deletions(-)
 create mode 100644 changelog.d/13672.feature

(limited to 'synapse')

diff --git a/changelog.d/13672.feature b/changelog.d/13672.feature
new file mode 100644
index 0000000000..2334e6fe15
--- /dev/null
+++ b/changelog.d/13672.feature
@@ -0,0 +1 @@
+Add admin APIs to fetch messages within a particular window of time.
diff --git a/docs/admin_api/rooms.md b/docs/admin_api/rooms.md
index 7526956bec..8f727b363e 100644
--- a/docs/admin_api/rooms.md
+++ b/docs/admin_api/rooms.md
@@ -393,6 +393,151 @@ A response body like the following is returned:
 }
 ```
 
+# Room Messages API
+
+The Room Messages admin API allows server admins to get all messages
+sent to a room in a given timeframe. There are various parameters available
+that allow for filtering and ordering the returned list. This API supports pagination.
+
+To use it, you will need to authenticate by providing an `access_token`
+for a server admin: see [Admin API](../usage/administration/admin_api).
+
+This endpoint mirrors the [Matrix Spec defined Messages API](https://spec.matrix.org/v1.1/client-server-api/#get_matrixclientv3roomsroomidmessages).
+
+The API is:
+```
+GET /_synapse/admin/v1/rooms/<room_id>/messages
+```
+
+**Parameters**
+
+The following path parameters are required:
+
+* `room_id` - The ID of the room you wish you fetch messages from.
+
+The following query parameters are available:
+
+* `from` (required) - The token to start returning events from. This token can be obtained from a prev_batch
+  or next_batch token returned by the /sync endpoint, or from an end token returned by a previous request to this endpoint.
+* `to` - The token to spot returning events at.
+* `limit` - The maximum number of events to return. Defaults to `10`.
+* `filter` - A JSON RoomEventFilter to filter returned events with.
+* `dir` - The direction to return events from. Either `f` for forwards or `b` for backwards. Setting
+  this value to `b` will reverse the above sort order. Defaults to `f`.
+
+**Response**
+
+The following fields are possible in the JSON response body:
+
+* `chunk` - A list of room events. The order depends on the dir parameter.
+          Note that an empty chunk does not necessarily imply that no more events are available. Clients should continue to paginate until no end property is returned.
+* `end` - A token corresponding to the end of chunk. This token can be passed back to this endpoint to request further events.
+          If no further events are available, this property is omitted from the response.
+* `start` - A token corresponding to the start of chunk.
+* `state` - A list of state events relevant to showing the chunk.
+
+**Example**
+
+For more details on each chunk, read [the Matrix specification](https://spec.matrix.org/v1.1/client-server-api/#get_matrixclientv3roomsroomidmessages).
+
+```json
+{
+  "chunk": [
+    {
+      "content": {
+        "body": "This is an example text message",
+        "format": "org.matrix.custom.html",
+        "formatted_body": "<b>This is an example text message</b>",
+        "msgtype": "m.text"
+      },
+      "event_id": "$143273582443PhrSn:example.org",
+      "origin_server_ts": 1432735824653,
+      "room_id": "!636q39766251:example.com",
+      "sender": "@example:example.org",
+      "type": "m.room.message",
+      "unsigned": {
+        "age": 1234
+      }
+    },
+    {
+      "content": {
+        "name": "The room name"
+      },
+      "event_id": "$143273582443PhrSn:example.org",
+      "origin_server_ts": 1432735824653,
+      "room_id": "!636q39766251:example.com",
+      "sender": "@example:example.org",
+      "state_key": "",
+      "type": "m.room.name",
+      "unsigned": {
+        "age": 1234
+      }
+    },
+    {
+      "content": {
+        "body": "Gangnam Style",
+        "info": {
+          "duration": 2140786,
+          "h": 320,
+          "mimetype": "video/mp4",
+          "size": 1563685,
+          "thumbnail_info": {
+            "h": 300,
+            "mimetype": "image/jpeg",
+            "size": 46144,
+            "w": 300
+          },
+          "thumbnail_url": "mxc://example.org/FHyPlCeYUSFFxlgbQYZmoEoe",
+          "w": 480
+        },
+        "msgtype": "m.video",
+        "url": "mxc://example.org/a526eYUSFFxlgbQYZmo442"
+      },
+      "event_id": "$143273582443PhrSn:example.org",
+      "origin_server_ts": 1432735824653,
+      "room_id": "!636q39766251:example.com",
+      "sender": "@example:example.org",
+      "type": "m.room.message",
+      "unsigned": {
+        "age": 1234
+      }
+    }
+  ],
+  "end": "t47409-4357353_219380_26003_2265",
+  "start": "t47429-4392820_219380_26003_2265"
+}
+```
+
+# Room Timestamp to Event API
+
+The Room Timestamp to Event API endpoint fetches the `event_id` of the closest event to the given
+timestamp (`ts` query parameter) in the given direction (`dir` query parameter).
+
+Useful for cases like jump to date so you can start paginating messages from
+a given date in the archive.
+
+The API is:
+```
+  GET /_synapse/admin/v1/rooms/<room_id>/timestamp_to_event
+```
+
+**Parameters**
+
+The following path parameters are required:
+
+* `room_id` - The ID of the room you wish to check.
+
+The following query parameters are available:
+
+* `ts` - a timestamp in milliseconds where we will find the closest event in
+  the given direction.
+* `dir` - can be `f` or `b` to indicate forwards and backwards in time from the
+  given timestamp. Defaults to `f`.
+
+**Response**
+
+* `event_id` - converted from timestamp
+
 # Block Room API
 The Block Room admin API allows server admins to block and unblock rooms,
 and query to see if a given room is blocked.
diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index a0c39778ab..1f83bab836 100644
--- a/synapse/handlers/pagination.py
+++ b/synapse/handlers/pagination.py
@@ -26,6 +26,7 @@ from synapse.events.utils import SerializeEventConfig
 from synapse.handlers.room import ShutdownRoomResponse
 from synapse.logging.opentracing import trace
 from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.rest.admin._base import assert_user_is_admin
 from synapse.storage.state import StateFilter
 from synapse.streams.config import PaginationConfig
 from synapse.types import JsonDict, Requester, StreamKeyType
@@ -423,6 +424,7 @@ class PaginationHandler:
         pagin_config: PaginationConfig,
         as_client_event: bool = True,
         event_filter: Optional[Filter] = None,
+        use_admin_priviledge: bool = False,
     ) -> JsonDict:
         """Get messages in a room.
 
@@ -432,10 +434,16 @@ class PaginationHandler:
             pagin_config: The pagination config rules to apply, if any.
             as_client_event: True to get events in client-server format.
             event_filter: Filter to apply to results or None
+            use_admin_priviledge: if `True`, return all events, regardless
+                of whether `user` has access to them. To be used **ONLY**
+                from the admin API.
 
         Returns:
             Pagination API results
         """
+        if use_admin_priviledge:
+            await assert_user_is_admin(self.auth, requester)
+
         user_id = requester.user.to_string()
 
         if pagin_config.from_token:
@@ -458,12 +466,14 @@ class PaginationHandler:
         room_token = from_token.room_key
 
         async with self.pagination_lock.read(room_id):
-            (
-                membership,
-                member_event_id,
-            ) = await self.auth.check_user_in_room_or_world_readable(
-                room_id, requester, allow_departed_users=True
-            )
+            (membership, member_event_id) = (None, None)
+            if not use_admin_priviledge:
+                (
+                    membership,
+                    member_event_id,
+                ) = await self.auth.check_user_in_room_or_world_readable(
+                    room_id, requester, allow_departed_users=True
+                )
 
             if pagin_config.direction == "b":
                 # if we're going backwards, we might need to backfill. This
@@ -475,7 +485,7 @@ class PaginationHandler:
                         room_id, room_token.stream
                     )
 
-                if membership == Membership.LEAVE:
+                if not use_admin_priviledge and membership == Membership.LEAVE:
                     # If they have left the room then clamp the token to be before
                     # they left the room, to save the effort of loading from the
                     # database.
@@ -528,12 +538,13 @@ class PaginationHandler:
         if event_filter:
             events = await event_filter.filter(events)
 
-        events = await filter_events_for_client(
-            self._storage_controllers,
-            user_id,
-            events,
-            is_peeking=(member_event_id is None),
-        )
+        if not use_admin_priviledge:
+            events = await filter_events_for_client(
+                self._storage_controllers,
+                user_id,
+                events,
+                is_peeking=(member_event_id is None),
+            )
 
         # if after the filter applied there are no more events
         # return immediately - but there might be more in next_token batch
diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py
index fa3266720b..bac754e1b1 100644
--- a/synapse/rest/admin/__init__.py
+++ b/synapse/rest/admin/__init__.py
@@ -61,9 +61,11 @@ from synapse.rest.admin.rooms import (
     MakeRoomAdminRestServlet,
     RoomEventContextServlet,
     RoomMembersRestServlet,
+    RoomMessagesRestServlet,
     RoomRestServlet,
     RoomRestV2Servlet,
     RoomStateRestServlet,
+    RoomTimestampToEventRestServlet,
 )
 from synapse.rest.admin.server_notice_servlet import SendServerNoticeServlet
 from synapse.rest.admin.statistics import UserMediaStatisticsRestServlet
@@ -271,6 +273,8 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     DestinationResetConnectionRestServlet(hs).register(http_server)
     DestinationRestServlet(hs).register(http_server)
     ListDestinationsRestServlet(hs).register(http_server)
+    RoomMessagesRestServlet(hs).register(http_server)
+    RoomTimestampToEventRestServlet(hs).register(http_server)
 
     # Some servlets only get registered for the main process.
     if hs.config.worker.worker_app is None:
diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py
index 3d870629c4..747e6fda83 100644
--- a/synapse/rest/admin/rooms.py
+++ b/synapse/rest/admin/rooms.py
@@ -35,6 +35,7 @@ from synapse.rest.admin._base import (
 )
 from synapse.storage.databases.main.room import RoomSortOrder
 from synapse.storage.state import StateFilter
+from synapse.streams.config import PaginationConfig
 from synapse.types import JsonDict, RoomID, UserID, create_requester
 from synapse.util import json_decoder
 
@@ -858,3 +859,106 @@ class BlockRoomRestServlet(RestServlet):
             await self._store.unblock_room(room_id)
 
         return HTTPStatus.OK, {"block": block}
+
+
+class RoomMessagesRestServlet(RestServlet):
+    """
+    Get messages list of a room.
+    """
+
+    PATTERNS = admin_patterns("/rooms/(?P<room_id>[^/]*)/messages$")
+
+    def __init__(self, hs: "HomeServer"):
+        self._hs = hs
+        self._clock = hs.get_clock()
+        self._pagination_handler = hs.get_pagination_handler()
+        self._auth = hs.get_auth()
+        self._store = hs.get_datastores().main
+
+    async def on_GET(
+        self, request: SynapseRequest, room_id: str
+    ) -> Tuple[int, JsonDict]:
+        requester = await self._auth.get_user_by_req(request)
+        await assert_user_is_admin(self._auth, requester)
+
+        pagination_config = await PaginationConfig.from_request(
+            self._store, request, default_limit=10
+        )
+        # Twisted will have processed the args by now.
+        assert request.args is not None
+        as_client_event = b"raw" not in request.args
+        filter_str = parse_string(request, "filter", encoding="utf-8")
+        if filter_str:
+            filter_json = urlparse.unquote(filter_str)
+            event_filter: Optional[Filter] = Filter(
+                self._hs, json_decoder.decode(filter_json)
+            )
+            if (
+                event_filter
+                and event_filter.filter_json.get("event_format", "client")
+                == "federation"
+            ):
+                as_client_event = False
+        else:
+            event_filter = None
+
+        msgs = await self._pagination_handler.get_messages(
+            room_id=room_id,
+            requester=requester,
+            pagin_config=pagination_config,
+            as_client_event=as_client_event,
+            event_filter=event_filter,
+            use_admin_priviledge=True,
+        )
+
+        return HTTPStatus.OK, msgs
+
+
+class RoomTimestampToEventRestServlet(RestServlet):
+    """
+    API endpoint to fetch the `event_id` of the closest event to the given
+    timestamp (`ts` query parameter) in the given direction (`dir` query
+    parameter).
+
+    Useful for cases like jump to date so you can start paginating messages from
+    a given date in the archive.
+
+    `ts` is a timestamp in milliseconds where we will find the closest event in
+    the given direction.
+
+    `dir` can be `f` or `b` to indicate forwards and backwards in time from the
+    given timestamp.
+
+    GET /_synapse/admin/v1/rooms/<roomID>/timestamp_to_event?ts=<timestamp>&dir=<direction>
+    {
+        "event_id": ...
+    }
+    """
+
+    PATTERNS = admin_patterns("/rooms/(?P<room_id>[^/]*)/timestamp_to_event$")
+
+    def __init__(self, hs: "HomeServer"):
+        self._auth = hs.get_auth()
+        self._store = hs.get_datastores().main
+        self._timestamp_lookup_handler = hs.get_timestamp_lookup_handler()
+
+    async def on_GET(
+        self, request: SynapseRequest, room_id: str
+    ) -> Tuple[int, JsonDict]:
+        requester = await self._auth.get_user_by_req(request)
+        await assert_user_is_admin(self._auth, requester)
+
+        timestamp = parse_integer(request, "ts", required=True)
+        direction = parse_string(request, "dir", default="f", allowed_values=["f", "b"])
+
+        (
+            event_id,
+            origin_server_ts,
+        ) = await self._timestamp_lookup_handler.get_event_for_timestamp(
+            requester, room_id, timestamp, direction
+        )
+
+        return HTTPStatus.OK, {
+            "event_id": event_id,
+            "origin_server_ts": origin_server_ts,
+        }
diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py
index 9d71a97524..d156be82b0 100644
--- a/tests/rest/admin/test_room.py
+++ b/tests/rest/admin/test_room.py
@@ -11,6 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import json
+import time
 import urllib.parse
 from typing import List, Optional
 from unittest.mock import Mock
@@ -22,10 +24,11 @@ from twisted.test.proto_helpers import MemoryReactor
 import synapse.rest.admin
 from synapse.api.constants import EventTypes, Membership, RoomTypes
 from synapse.api.errors import Codes
-from synapse.handlers.pagination import PaginationHandler
+from synapse.handlers.pagination import PaginationHandler, PurgeStatus
 from synapse.rest.client import directory, events, login, room
 from synapse.server import HomeServer
 from synapse.util import Clock
+from synapse.util.stringutils import random_string
 
 from tests import unittest
 
@@ -1793,6 +1796,159 @@ class RoomTestCase(unittest.HomeserverTestCase):
         self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"])
 
 
+class RoomMessagesTestCase(unittest.HomeserverTestCase):
+    servlets = [
+        synapse.rest.admin.register_servlets,
+        login.register_servlets,
+        room.register_servlets,
+    ]
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.admin_user = self.register_user("admin", "pass", admin=True)
+        self.admin_user_tok = self.login("admin", "pass")
+
+        self.user = self.register_user("foo", "pass")
+        self.user_tok = self.login("foo", "pass")
+        self.room_id = self.helper.create_room_as(self.user, tok=self.user_tok)
+
+    def test_timestamp_to_event(self) -> None:
+        """Test that providing the current timestamp can get the last event."""
+        self.helper.send(self.room_id, body="message 1", tok=self.user_tok)
+        second_event_id = self.helper.send(
+            self.room_id, body="message 2", tok=self.user_tok
+        )["event_id"]
+        ts = str(round(time.time() * 1000))
+
+        channel = self.make_request(
+            "GET",
+            "/_synapse/admin/v1/rooms/%s/timestamp_to_event?dir=b&ts=%s"
+            % (self.room_id, ts),
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(200, channel.code)
+        self.assertIn("event_id", channel.json_body)
+        self.assertEqual(second_event_id, channel.json_body["event_id"])
+
+    def test_topo_token_is_accepted(self) -> None:
+        """Test Topo Token is accepted."""
+        token = "t1-0_0_0_0_0_0_0_0_0"
+        channel = self.make_request(
+            "GET",
+            "/_synapse/admin/v1/rooms/%s/messages?from=%s" % (self.room_id, token),
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(200, channel.code)
+        self.assertIn("start", channel.json_body)
+        self.assertEqual(token, channel.json_body["start"])
+        self.assertIn("chunk", channel.json_body)
+        self.assertIn("end", channel.json_body)
+
+    def test_stream_token_is_accepted_for_fwd_pagianation(self) -> None:
+        """Test that stream token is accepted for forward pagination."""
+        token = "s0_0_0_0_0_0_0_0_0"
+        channel = self.make_request(
+            "GET",
+            "/_synapse/admin/v1/rooms/%s/messages?from=%s" % (self.room_id, token),
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(200, channel.code)
+        self.assertIn("start", channel.json_body)
+        self.assertEqual(token, channel.json_body["start"])
+        self.assertIn("chunk", channel.json_body)
+        self.assertIn("end", channel.json_body)
+
+    def test_room_messages_purge(self) -> None:
+        """Test room messages can be retrieved by an admin that isn't in the room."""
+        store = self.hs.get_datastores().main
+        pagination_handler = self.hs.get_pagination_handler()
+
+        # Send a first message in the room, which will be removed by the purge.
+        first_event_id = self.helper.send(
+            self.room_id, body="message 1", tok=self.user_tok
+        )["event_id"]
+        first_token = self.get_success(
+            store.get_topological_token_for_event(first_event_id)
+        )
+        first_token_str = self.get_success(first_token.to_string(store))
+
+        # Send a second message in the room, which won't be removed, and which we'll
+        # use as the marker to purge events before.
+        second_event_id = self.helper.send(
+            self.room_id, body="message 2", tok=self.user_tok
+        )["event_id"]
+        second_token = self.get_success(
+            store.get_topological_token_for_event(second_event_id)
+        )
+        second_token_str = self.get_success(second_token.to_string(store))
+
+        # Send a third event in the room to ensure we don't fall under any edge case
+        # due to our marker being the latest forward extremity in the room.
+        self.helper.send(self.room_id, body="message 3", tok=self.user_tok)
+
+        # Check that we get the first and second message when querying /messages.
+        channel = self.make_request(
+            "GET",
+            "/_synapse/admin/v1/rooms/%s/messages?from=%s&dir=b&filter=%s"
+            % (
+                self.room_id,
+                second_token_str,
+                json.dumps({"types": [EventTypes.Message]}),
+            ),
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(channel.code, 200, channel.json_body)
+
+        chunk = channel.json_body["chunk"]
+        self.assertEqual(len(chunk), 2, [event["content"] for event in chunk])
+
+        # Purge every event before the second event.
+        purge_id = random_string(16)
+        pagination_handler._purges_by_id[purge_id] = PurgeStatus()
+        self.get_success(
+            pagination_handler._purge_history(
+                purge_id=purge_id,
+                room_id=self.room_id,
+                token=second_token_str,
+                delete_local_events=True,
+            )
+        )
+
+        # Check that we only get the second message through /message now that the first
+        # has been purged.
+        channel = self.make_request(
+            "GET",
+            "/_synapse/admin/v1/rooms/%s/messages?from=%s&dir=b&filter=%s"
+            % (
+                self.room_id,
+                second_token_str,
+                json.dumps({"types": [EventTypes.Message]}),
+            ),
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(channel.code, 200, channel.json_body)
+
+        chunk = channel.json_body["chunk"]
+        self.assertEqual(len(chunk), 1, [event["content"] for event in chunk])
+
+        # Check that we get no event, but also no error, when querying /messages with
+        # the token that was pointing at the first event, because we don't have it
+        # anymore.
+        channel = self.make_request(
+            "GET",
+            "/_synapse/admin/v1/rooms/%s/messages?from=%s&dir=b&filter=%s"
+            % (
+                self.room_id,
+                first_token_str,
+                json.dumps({"types": [EventTypes.Message]}),
+            ),
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(channel.code, 200, channel.json_body)
+
+        chunk = channel.json_body["chunk"]
+        self.assertEqual(len(chunk), 0, [event["content"] for event in chunk])
+
+
 class JoinAliasRoomTestCase(unittest.HomeserverTestCase):
 
     servlets = [
-- 
cgit 1.5.1


From c2fe48a6ffb99f553f3eaecb8f15bcbedb58add0 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Wed, 7 Sep 2022 10:08:20 +0000
Subject: Rename the `EventFormatVersions` enum values so that they line up
 with room version numbers. (#13706)

---
 changelog.d/13706.misc                             |  1 +
 synapse/api/room_versions.py                       | 45 ++++++++++++----------
 synapse/event_auth.py                              |  4 +-
 synapse/events/__init__.py                         | 12 +++---
 synapse/events/builder.py                          |  4 +-
 synapse/events/validator.py                        |  2 +-
 synapse/federation/federation_base.py              |  2 +-
 synapse/federation/federation_client.py            |  2 +-
 synapse/storage/databases/main/event_federation.py |  2 +-
 synapse/storage/databases/main/events_worker.py    |  6 +--
 tests/storage/databases/main/test_events_worker.py |  2 +-
 tests/storage/test_event_federation.py             |  2 +-
 tests/test_event_auth.py                           |  4 +-
 13 files changed, 47 insertions(+), 41 deletions(-)
 create mode 100644 changelog.d/13706.misc

(limited to 'synapse')

diff --git a/changelog.d/13706.misc b/changelog.d/13706.misc
new file mode 100644
index 0000000000..65c854c7a9
--- /dev/null
+++ b/changelog.d/13706.misc
@@ -0,0 +1 @@
+Rename the `EventFormatVersions` enum values so that they line up with room version numbers.
\ No newline at end of file
diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py
index a0e4ab6db6..e37acb0f1e 100644
--- a/synapse/api/room_versions.py
+++ b/synapse/api/room_versions.py
@@ -19,18 +19,23 @@ import attr
 
 class EventFormatVersions:
     """This is an internal enum for tracking the version of the event format,
-    independently from the room version.
+    independently of the room version.
+
+    To reduce confusion, the event format versions are named after the room
+    versions that they were used or introduced in.
+    The concept of an 'event format version' is specific to Synapse (the
+    specification does not mention this term.)
     """
 
-    V1 = 1  # $id:server event id format
-    V2 = 2  # MSC1659-style $hash event id format: introduced for room v3
-    V3 = 3  # MSC1884-style $hash format: introduced for room v4
+    ROOM_V1_V2 = 1  # $id:server event id format: used for room v1 and v2
+    ROOM_V3 = 2  # MSC1659-style $hash event id format: used for room v3
+    ROOM_V4_PLUS = 3  # MSC1884-style $hash format: introduced for room v4
 
 
 KNOWN_EVENT_FORMAT_VERSIONS = {
-    EventFormatVersions.V1,
-    EventFormatVersions.V2,
-    EventFormatVersions.V3,
+    EventFormatVersions.ROOM_V1_V2,
+    EventFormatVersions.ROOM_V3,
+    EventFormatVersions.ROOM_V4_PLUS,
 }
 
 
@@ -92,7 +97,7 @@ class RoomVersions:
     V1 = RoomVersion(
         "1",
         RoomDisposition.STABLE,
-        EventFormatVersions.V1,
+        EventFormatVersions.ROOM_V1_V2,
         StateResolutionVersions.V1,
         enforce_key_validity=False,
         special_case_aliases_auth=True,
@@ -110,7 +115,7 @@ class RoomVersions:
     V2 = RoomVersion(
         "2",
         RoomDisposition.STABLE,
-        EventFormatVersions.V1,
+        EventFormatVersions.ROOM_V1_V2,
         StateResolutionVersions.V2,
         enforce_key_validity=False,
         special_case_aliases_auth=True,
@@ -128,7 +133,7 @@ class RoomVersions:
     V3 = RoomVersion(
         "3",
         RoomDisposition.STABLE,
-        EventFormatVersions.V2,
+        EventFormatVersions.ROOM_V3,
         StateResolutionVersions.V2,
         enforce_key_validity=False,
         special_case_aliases_auth=True,
@@ -146,7 +151,7 @@ class RoomVersions:
     V4 = RoomVersion(
         "4",
         RoomDisposition.STABLE,
-        EventFormatVersions.V3,
+        EventFormatVersions.ROOM_V4_PLUS,
         StateResolutionVersions.V2,
         enforce_key_validity=False,
         special_case_aliases_auth=True,
@@ -164,7 +169,7 @@ class RoomVersions:
     V5 = RoomVersion(
         "5",
         RoomDisposition.STABLE,
-        EventFormatVersions.V3,
+        EventFormatVersions.ROOM_V4_PLUS,
         StateResolutionVersions.V2,
         enforce_key_validity=True,
         special_case_aliases_auth=True,
@@ -182,7 +187,7 @@ class RoomVersions:
     V6 = RoomVersion(
         "6",
         RoomDisposition.STABLE,
-        EventFormatVersions.V3,
+        EventFormatVersions.ROOM_V4_PLUS,
         StateResolutionVersions.V2,
         enforce_key_validity=True,
         special_case_aliases_auth=False,
@@ -200,7 +205,7 @@ class RoomVersions:
     MSC2176 = RoomVersion(
         "org.matrix.msc2176",
         RoomDisposition.UNSTABLE,
-        EventFormatVersions.V3,
+        EventFormatVersions.ROOM_V4_PLUS,
         StateResolutionVersions.V2,
         enforce_key_validity=True,
         special_case_aliases_auth=False,
@@ -218,7 +223,7 @@ class RoomVersions:
     V7 = RoomVersion(
         "7",
         RoomDisposition.STABLE,
-        EventFormatVersions.V3,
+        EventFormatVersions.ROOM_V4_PLUS,
         StateResolutionVersions.V2,
         enforce_key_validity=True,
         special_case_aliases_auth=False,
@@ -236,7 +241,7 @@ class RoomVersions:
     V8 = RoomVersion(
         "8",
         RoomDisposition.STABLE,
-        EventFormatVersions.V3,
+        EventFormatVersions.ROOM_V4_PLUS,
         StateResolutionVersions.V2,
         enforce_key_validity=True,
         special_case_aliases_auth=False,
@@ -254,7 +259,7 @@ class RoomVersions:
     V9 = RoomVersion(
         "9",
         RoomDisposition.STABLE,
-        EventFormatVersions.V3,
+        EventFormatVersions.ROOM_V4_PLUS,
         StateResolutionVersions.V2,
         enforce_key_validity=True,
         special_case_aliases_auth=False,
@@ -272,7 +277,7 @@ class RoomVersions:
     MSC3787 = RoomVersion(
         "org.matrix.msc3787",
         RoomDisposition.UNSTABLE,
-        EventFormatVersions.V3,
+        EventFormatVersions.ROOM_V4_PLUS,
         StateResolutionVersions.V2,
         enforce_key_validity=True,
         special_case_aliases_auth=False,
@@ -290,7 +295,7 @@ class RoomVersions:
     V10 = RoomVersion(
         "10",
         RoomDisposition.STABLE,
-        EventFormatVersions.V3,
+        EventFormatVersions.ROOM_V4_PLUS,
         StateResolutionVersions.V2,
         enforce_key_validity=True,
         special_case_aliases_auth=False,
@@ -308,7 +313,7 @@ class RoomVersions:
     MSC2716v4 = RoomVersion(
         "org.matrix.msc2716v4",
         RoomDisposition.UNSTABLE,
-        EventFormatVersions.V3,
+        EventFormatVersions.ROOM_V4_PLUS,
         StateResolutionVersions.V2,
         enforce_key_validity=True,
         special_case_aliases_auth=False,
diff --git a/synapse/event_auth.py b/synapse/event_auth.py
index 389b0c5d53..c7d5ef92fc 100644
--- a/synapse/event_auth.py
+++ b/synapse/event_auth.py
@@ -109,7 +109,7 @@ def validate_event_for_room_version(event: "EventBase") -> None:
         if not is_invite_via_3pid:
             raise AuthError(403, "Event not signed by sender's server")
 
-    if event.format_version in (EventFormatVersions.V1,):
+    if event.format_version in (EventFormatVersions.ROOM_V1_V2,):
         # Only older room versions have event IDs to check.
         event_id_domain = get_domain_from_id(event.event_id)
 
@@ -716,7 +716,7 @@ def check_redaction(
     if user_level >= redact_level:
         return False
 
-    if room_version_obj.event_format == EventFormatVersions.V1:
+    if room_version_obj.event_format == EventFormatVersions.ROOM_V1_V2:
         redacter_domain = get_domain_from_id(event.event_id)
         if not isinstance(event.redacts, str):
             return False
diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py
index 39ad2793d9..b2c9119fd0 100644
--- a/synapse/events/__init__.py
+++ b/synapse/events/__init__.py
@@ -442,7 +442,7 @@ class EventBase(metaclass=abc.ABCMeta):
 
 
 class FrozenEvent(EventBase):
-    format_version = EventFormatVersions.V1  # All events of this type are V1
+    format_version = EventFormatVersions.ROOM_V1_V2  # All events of this type are V1
 
     def __init__(
         self,
@@ -490,7 +490,7 @@ class FrozenEvent(EventBase):
 
 
 class FrozenEventV2(EventBase):
-    format_version = EventFormatVersions.V2  # All events of this type are V2
+    format_version = EventFormatVersions.ROOM_V3  # All events of this type are V2
 
     def __init__(
         self,
@@ -567,7 +567,7 @@ class FrozenEventV2(EventBase):
 class FrozenEventV3(FrozenEventV2):
     """FrozenEventV3, which differs from FrozenEventV2 only in the event_id format"""
 
-    format_version = EventFormatVersions.V3  # All events of this type are V3
+    format_version = EventFormatVersions.ROOM_V4_PLUS  # All events of this type are V3
 
     @property
     def event_id(self) -> str:
@@ -597,11 +597,11 @@ def _event_type_from_format_version(
         `FrozenEvent`
     """
 
-    if format_version == EventFormatVersions.V1:
+    if format_version == EventFormatVersions.ROOM_V1_V2:
         return FrozenEvent
-    elif format_version == EventFormatVersions.V2:
+    elif format_version == EventFormatVersions.ROOM_V3:
         return FrozenEventV2
-    elif format_version == EventFormatVersions.V3:
+    elif format_version == EventFormatVersions.ROOM_V4_PLUS:
         return FrozenEventV3
     else:
         raise Exception("No event format %r" % (format_version,))
diff --git a/synapse/events/builder.py b/synapse/events/builder.py
index 17f624b68f..746bd3978d 100644
--- a/synapse/events/builder.py
+++ b/synapse/events/builder.py
@@ -137,7 +137,7 @@ class EventBuilder:
         # The types of auth/prev events changes between event versions.
         prev_events: Union[List[str], List[Tuple[str, Dict[str, str]]]]
         auth_events: Union[List[str], List[Tuple[str, Dict[str, str]]]]
-        if format_version == EventFormatVersions.V1:
+        if format_version == EventFormatVersions.ROOM_V1_V2:
             auth_events = await self._store.add_event_hashes(auth_event_ids)
             prev_events = await self._store.add_event_hashes(prev_event_ids)
         else:
@@ -253,7 +253,7 @@ def create_local_event_from_event_dict(
 
     time_now = int(clock.time_msec())
 
-    if format_version == EventFormatVersions.V1:
+    if format_version == EventFormatVersions.ROOM_V1_V2:
         event_dict["event_id"] = _create_event_id(clock, hostname)
 
     event_dict["origin"] = hostname
diff --git a/synapse/events/validator.py b/synapse/events/validator.py
index 27c8beba25..a6f0104396 100644
--- a/synapse/events/validator.py
+++ b/synapse/events/validator.py
@@ -45,7 +45,7 @@ class EventValidator:
         """
         self.validate_builder(event)
 
-        if event.format_version == EventFormatVersions.V1:
+        if event.format_version == EventFormatVersions.ROOM_V1_V2:
             EventID.from_string(event.event_id)
 
         required = [
diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py
index 4269a98db2..abe2c1971a 100644
--- a/synapse/federation/federation_base.py
+++ b/synapse/federation/federation_base.py
@@ -194,7 +194,7 @@ async def _check_sigs_on_pdu(
     # event id's domain (normally only the case for joins/leaves), and add additional
     # checks. Only do this if the room version has a concept of event ID domain
     # (ie, the room version uses old-style non-hash event IDs).
-    if room_version.event_format == EventFormatVersions.V1:
+    if room_version.event_format == EventFormatVersions.ROOM_V1_V2:
         event_domain = get_domain_from_id(pdu.event_id)
         if event_domain != sender_domain:
             try:
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index 7ee2974bb1..4a4289ee7c 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -1190,7 +1190,7 @@ class FederationClient(FederationBase):
             # Otherwise, consider it a legitimate error and raise.
             err = e.to_synapse_error()
             if self._is_unknown_endpoint(e, err):
-                if room_version.event_format != EventFormatVersions.V1:
+                if room_version.event_format != EventFormatVersions.ROOM_V1_V2:
                     raise SynapseError(
                         400,
                         "User's homeserver does not support this room version",
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index c836078da6..e687f87eca 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -1606,7 +1606,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
                 logger.info("Invalid prev_events for %s", event_id)
                 continue
 
-            if room_version.event_format == EventFormatVersions.V1:
+            if room_version.event_format == EventFormatVersions.ROOM_V1_V2:
                 for prev_event_tuple in prev_events:
                     if (
                         not isinstance(prev_event_tuple, list)
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 9b997c304d..84f17a9945 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -1156,7 +1156,7 @@ class EventsWorkerStore(SQLBaseStore):
             if format_version is None:
                 # This means that we stored the event before we had the concept
                 # of a event format version, so it must be a V1 event.
-                format_version = EventFormatVersions.V1
+                format_version = EventFormatVersions.ROOM_V1_V2
 
             room_version_id = row.room_version_id
 
@@ -1186,10 +1186,10 @@ class EventsWorkerStore(SQLBaseStore):
                 #
                 # So, the following approximations should be adequate.
 
-                if format_version == EventFormatVersions.V1:
+                if format_version == EventFormatVersions.ROOM_V1_V2:
                     # if it's event format v1 then it must be room v1 or v2
                     room_version = RoomVersions.V1
-                elif format_version == EventFormatVersions.V2:
+                elif format_version == EventFormatVersions.ROOM_V3:
                     # if it's event format v2 then it must be room v3
                     room_version = RoomVersions.V3
                 else:
diff --git a/tests/storage/databases/main/test_events_worker.py b/tests/storage/databases/main/test_events_worker.py
index 46d829b062..67401272ac 100644
--- a/tests/storage/databases/main/test_events_worker.py
+++ b/tests/storage/databases/main/test_events_worker.py
@@ -254,7 +254,7 @@ class DatabaseOutageTestCase(unittest.HomeserverTestCase):
                         "room_id": self.room_id,
                         "json": json.dumps(event_json),
                         "internal_metadata": "{}",
-                        "format_version": EventFormatVersions.V3,
+                        "format_version": EventFormatVersions.ROOM_V4_PLUS,
                     },
                 )
             )
diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py
index d92a9ac5b7..a6679e1312 100644
--- a/tests/storage/test_event_federation.py
+++ b/tests/storage/test_event_federation.py
@@ -513,7 +513,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
 
         def prev_event_format(prev_event_id: str) -> Union[Tuple[str, dict], str]:
             """Account for differences in prev_events format across room versions"""
-            if room_version.event_format == EventFormatVersions.V1:
+            if room_version.event_format == EventFormatVersions.ROOM_V1_V2:
                 return prev_event_id, {}
 
             return prev_event_id
diff --git a/tests/test_event_auth.py b/tests/test_event_auth.py
index e42d7b9ba0..f4d9fba0a1 100644
--- a/tests/test_event_auth.py
+++ b/tests/test_event_auth.py
@@ -821,7 +821,7 @@ def _alias_event(room_version: RoomVersion, sender: str, **kwargs) -> EventBase:
 def _build_auth_dict_for_room_version(
     room_version: RoomVersion, auth_events: Iterable[EventBase]
 ) -> List:
-    if room_version.event_format == EventFormatVersions.V1:
+    if room_version.event_format == EventFormatVersions.ROOM_V1_V2:
         return [(e.event_id, "not_used") for e in auth_events]
     else:
         return [e.event_id for e in auth_events]
@@ -871,7 +871,7 @@ event_count = 0
 
 def _maybe_get_event_id_dict_for_room_version(room_version: RoomVersion) -> dict:
     """If this room version needs it, generate an event id"""
-    if room_version.event_format != EventFormatVersions.V1:
+    if room_version.event_format != EventFormatVersions.ROOM_V1_V2:
         return {}
 
     global event_count
-- 
cgit 1.5.1


From d3d9ca156e323fe194b1bcb1af1628f65a2f3c1c Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Wed, 7 Sep 2022 11:03:32 +0000
Subject: Cancel the processing of key query requests when they time out.
 (#13680)

---
 changelog.d/13680.feature                          |  1 +
 synapse/api/auth.py                                |  5 +++
 synapse/handlers/device.py                         |  3 ++
 synapse/handlers/e2e_keys.py                       | 40 +++++++++++++---------
 synapse/rest/client/keys.py                        |  6 ++--
 synapse/storage/controllers/state.py               |  4 +++
 synapse/storage/databases/main/devices.py          |  4 +++
 synapse/storage/databases/main/end_to_end_keys.py  |  5 ++-
 synapse/storage/databases/main/event_federation.py |  2 ++
 synapse/storage/databases/main/events_worker.py    |  4 +++
 synapse/storage/databases/main/roommember.py       |  2 ++
 synapse/storage/databases/main/state.py            |  2 ++
 synapse/storage/databases/main/stream.py           |  2 ++
 synapse/storage/databases/state/store.py           |  3 ++
 .../storage/util/partial_state_events_tracker.py   |  3 ++
 synapse/types.py                                   |  5 +++
 tests/http/server/_base.py                         | 10 +++++-
 tests/rest/client/test_keys.py                     | 29 ++++++++++++++++
 18 files changed, 110 insertions(+), 20 deletions(-)
 create mode 100644 changelog.d/13680.feature

(limited to 'synapse')

diff --git a/changelog.d/13680.feature b/changelog.d/13680.feature
new file mode 100644
index 0000000000..4234c7e082
--- /dev/null
+++ b/changelog.d/13680.feature
@@ -0,0 +1 @@
+Cancel the processing of key query requests when they time out.
\ No newline at end of file
diff --git a/synapse/api/auth.py b/synapse/api/auth.py
index 9a1aea083f..8e54ef84b2 100644
--- a/synapse/api/auth.py
+++ b/synapse/api/auth.py
@@ -38,6 +38,7 @@ from synapse.logging.opentracing import (
     trace,
 )
 from synapse.types import Requester, create_requester
+from synapse.util.cancellation import cancellable
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -118,6 +119,7 @@ class Auth:
             errcode=Codes.NOT_JOINED,
         )
 
+    @cancellable
     async def get_user_by_req(
         self,
         request: SynapseRequest,
@@ -166,6 +168,7 @@ class Auth:
                     parent_span.set_tag("appservice_id", requester.app_service.id)
             return requester
 
+    @cancellable
     async def _wrapped_get_user_by_req(
         self,
         request: SynapseRequest,
@@ -281,6 +284,7 @@ class Auth:
                 403, "Application service has not registered this user (%s)" % user_id
             )
 
+    @cancellable
     async def _get_appservice_user(self, request: Request) -> Optional[Requester]:
         """
         Given a request, reads the request parameters to determine:
@@ -523,6 +527,7 @@ class Auth:
         return bool(query_params) or bool(auth_headers)
 
     @staticmethod
+    @cancellable
     def get_access_token_from_request(request: Request) -> str:
         """Extracts the access_token from the request.
 
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 9c2c3a0e68..c5ac169644 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -52,6 +52,7 @@ from synapse.types import (
 from synapse.util import stringutils
 from synapse.util.async_helpers import Linearizer
 from synapse.util.caches.expiringcache import ExpiringCache
+from synapse.util.cancellation import cancellable
 from synapse.util.metrics import measure_func
 from synapse.util.retryutils import NotRetryingDestination
 
@@ -124,6 +125,7 @@ class DeviceWorkerHandler:
 
         return device
 
+    @cancellable
     async def get_device_changes_in_shared_rooms(
         self, user_id: str, room_ids: Collection[str], from_token: StreamToken
     ) -> Collection[str]:
@@ -163,6 +165,7 @@ class DeviceWorkerHandler:
 
     @trace
     @measure_func("device.get_user_ids_changed")
+    @cancellable
     async def get_user_ids_changed(
         self, user_id: str, from_token: StreamToken
     ) -> JsonDict:
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index c938339ddd..ec81639c78 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -37,7 +37,8 @@ from synapse.types import (
     get_verify_key_from_cross_signing_key,
 )
 from synapse.util import json_decoder, unwrapFirstError
-from synapse.util.async_helpers import Linearizer
+from synapse.util.async_helpers import Linearizer, delay_cancellation
+from synapse.util.cancellation import cancellable
 from synapse.util.retryutils import NotRetryingDestination
 
 if TYPE_CHECKING:
@@ -91,6 +92,7 @@ class E2eKeysHandler:
         )
 
     @trace
+    @cancellable
     async def query_devices(
         self,
         query_body: JsonDict,
@@ -208,22 +210,26 @@ class E2eKeysHandler:
                     r[user_id] = remote_queries[user_id]
 
             # Now fetch any devices that we don't have in our cache
+            # TODO It might make sense to propagate cancellations into the
+            #      deferreds which are querying remote homeservers.
             await make_deferred_yieldable(
-                defer.gatherResults(
-                    [
-                        run_in_background(
-                            self._query_devices_for_destination,
-                            results,
-                            cross_signing_keys,
-                            failures,
-                            destination,
-                            queries,
-                            timeout,
-                        )
-                        for destination, queries in remote_queries_not_in_cache.items()
-                    ],
-                    consumeErrors=True,
-                ).addErrback(unwrapFirstError)
+                delay_cancellation(
+                    defer.gatherResults(
+                        [
+                            run_in_background(
+                                self._query_devices_for_destination,
+                                results,
+                                cross_signing_keys,
+                                failures,
+                                destination,
+                                queries,
+                                timeout,
+                            )
+                            for destination, queries in remote_queries_not_in_cache.items()
+                        ],
+                        consumeErrors=True,
+                    ).addErrback(unwrapFirstError)
+                )
             )
 
             ret = {"device_keys": results, "failures": failures}
@@ -347,6 +353,7 @@ class E2eKeysHandler:
 
         return
 
+    @cancellable
     async def get_cross_signing_keys_from_cache(
         self, query: Iterable[str], from_user_id: Optional[str]
     ) -> Dict[str, Dict[str, dict]]:
@@ -393,6 +400,7 @@ class E2eKeysHandler:
         }
 
     @trace
+    @cancellable
     async def query_local_devices(
         self, query: Mapping[str, Optional[List[str]]]
     ) -> Dict[str, Dict[str, dict]]:
diff --git a/synapse/rest/client/keys.py b/synapse/rest/client/keys.py
index a395694fa5..f653d2a3e1 100644
--- a/synapse/rest/client/keys.py
+++ b/synapse/rest/client/keys.py
@@ -27,9 +27,9 @@ from synapse.http.servlet import (
 )
 from synapse.http.site import SynapseRequest
 from synapse.logging.opentracing import log_kv, set_tag
+from synapse.rest.client._base import client_patterns, interactive_auth_handler
 from synapse.types import JsonDict, StreamToken
-
-from ._base import client_patterns, interactive_auth_handler
+from synapse.util.cancellation import cancellable
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -156,6 +156,7 @@ class KeyQueryServlet(RestServlet):
         self.auth = hs.get_auth()
         self.e2e_keys_handler = hs.get_e2e_keys_handler()
 
+    @cancellable
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request, allow_guest=True)
         user_id = requester.user.to_string()
@@ -199,6 +200,7 @@ class KeyChangesServlet(RestServlet):
         self.device_handler = hs.get_device_handler()
         self.store = hs.get_datastores().main
 
+    @cancellable
     async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request, allow_guest=True)
 
diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py
index ba5380ce3e..bbe568bf05 100644
--- a/synapse/storage/controllers/state.py
+++ b/synapse/storage/controllers/state.py
@@ -36,6 +36,7 @@ from synapse.storage.util.partial_state_events_tracker import (
     PartialStateEventsTracker,
 )
 from synapse.types import MutableStateMap, StateMap
+from synapse.util.cancellation import cancellable
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -229,6 +230,7 @@ class StateStorageController:
 
     @trace
     @tag_args
+    @cancellable
     async def get_state_ids_for_events(
         self,
         event_ids: Collection[str],
@@ -350,6 +352,7 @@ class StateStorageController:
 
     @trace
     @tag_args
+    @cancellable
     async def get_state_group_for_events(
         self,
         event_ids: Collection[str],
@@ -398,6 +401,7 @@ class StateStorageController:
             event_id, room_id, prev_group, delta_ids, current_state_ids
         )
 
+    @cancellable
     async def get_current_state_ids(
         self,
         room_id: str,
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index ca0fe8c4be..5d700ca6c3 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -53,6 +53,7 @@ from synapse.util import json_decoder, json_encoder
 from synapse.util.caches.descriptors import cached, cachedList
 from synapse.util.caches.lrucache import LruCache
 from synapse.util.caches.stream_change_cache import StreamChangeCache
+from synapse.util.cancellation import cancellable
 from synapse.util.iterutils import batch_iter
 from synapse.util.stringutils import shortstr
 
@@ -668,6 +669,7 @@ class DeviceWorkerStore(EndToEndKeyWorkerStore):
         ...
 
     @trace
+    @cancellable
     async def get_user_devices_from_cache(
         self, query_list: List[Tuple[str, Optional[str]]]
     ) -> Tuple[Set[str], Dict[str, Dict[str, JsonDict]]]:
@@ -743,6 +745,7 @@ class DeviceWorkerStore(EndToEndKeyWorkerStore):
 
         return self._device_list_stream_cache.get_all_entities_changed(from_key)
 
+    @cancellable
     async def get_users_whose_devices_changed(
         self,
         from_key: int,
@@ -1221,6 +1224,7 @@ class DeviceWorkerStore(EndToEndKeyWorkerStore):
             desc="get_min_device_lists_changes_in_room",
         )
 
+    @cancellable
     async def get_device_list_changes_in_rooms(
         self, room_ids: Collection[str], from_id: int
     ) -> Optional[Set[str]]:
diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index 46c0d06157..8e9e1b0b4b 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -50,6 +50,7 @@ from synapse.storage.util.id_generators import StreamIdGenerator
 from synapse.types import JsonDict
 from synapse.util import json_encoder
 from synapse.util.caches.descriptors import cached, cachedList
+from synapse.util.cancellation import cancellable
 from synapse.util.iterutils import batch_iter
 
 if TYPE_CHECKING:
@@ -135,6 +136,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
         return now_stream_id, []
 
     @trace
+    @cancellable
     async def get_e2e_device_keys_for_cs_api(
         self, query_list: List[Tuple[str, Optional[str]]]
     ) -> Dict[str, Dict[str, JsonDict]]:
@@ -197,6 +199,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
         ...
 
     @trace
+    @cancellable
     async def get_e2e_device_keys_and_signatures(
         self,
         query_list: Collection[Tuple[str, Optional[str]]],
@@ -887,6 +890,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
 
         return keys
 
+    @cancellable
     async def get_e2e_cross_signing_keys_bulk(
         self, user_ids: List[str], from_user_id: Optional[str] = None
     ) -> Dict[str, Optional[Dict[str, JsonDict]]]:
@@ -902,7 +906,6 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
             keys were not found, either their user ID will not be in the dict,
             or their user ID will map to None.
         """
-
         result = await self._get_bare_e2e_cross_signing_keys_bulk(user_ids)
 
         if from_user_id:
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index e687f87eca..ca47a22bf1 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -48,6 +48,7 @@ from synapse.types import JsonDict
 from synapse.util import json_encoder
 from synapse.util.caches.descriptors import cached
 from synapse.util.caches.lrucache import LruCache
+from synapse.util.cancellation import cancellable
 from synapse.util.iterutils import batch_iter
 
 if TYPE_CHECKING:
@@ -976,6 +977,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
 
         return int(min_depth) if min_depth is not None else None
 
+    @cancellable
     async def get_forward_extremities_for_room_at_stream_ordering(
         self, room_id: str, stream_ordering: int
     ) -> List[str]:
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 84f17a9945..52914febf9 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -81,6 +81,7 @@ from synapse.util import unwrapFirstError
 from synapse.util.async_helpers import ObservableDeferred, delay_cancellation
 from synapse.util.caches.descriptors import cached, cachedList
 from synapse.util.caches.lrucache import AsyncLruCache
+from synapse.util.cancellation import cancellable
 from synapse.util.iterutils import batch_iter
 from synapse.util.metrics import Measure
 
@@ -339,6 +340,7 @@ class EventsWorkerStore(SQLBaseStore):
     ) -> Optional[EventBase]:
         ...
 
+    @cancellable
     async def get_event(
         self,
         event_id: str,
@@ -433,6 +435,7 @@ class EventsWorkerStore(SQLBaseStore):
 
     @trace
     @tag_args
+    @cancellable
     async def get_events_as_list(
         self,
         event_ids: Collection[str],
@@ -584,6 +587,7 @@ class EventsWorkerStore(SQLBaseStore):
 
         return events
 
+    @cancellable
     async def _get_events_from_cache_or_db(
         self, event_ids: Iterable[str], allow_rejected: bool = False
     ) -> Dict[str, EventCacheEntry]:
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 4f0adb136a..a77e49dc66 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -55,6 +55,7 @@ from synapse.types import JsonDict, PersistedEventPosition, StateMap, get_domain
 from synapse.util.async_helpers import Linearizer
 from synapse.util.caches import intern_string
 from synapse.util.caches.descriptors import _CacheContext, cached, cachedList
+from synapse.util.cancellation import cancellable
 from synapse.util.iterutils import batch_iter
 from synapse.util.metrics import Measure
 
@@ -770,6 +771,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             _get_users_server_still_shares_room_with_txn,
         )
 
+    @cancellable
     async def get_rooms_for_user(
         self, user_id: str, on_invalidate: Optional[Callable[[], None]] = None
     ) -> FrozenSet[str]:
diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py
index 0b10af0e58..e607ccfdc9 100644
--- a/synapse/storage/databases/main/state.py
+++ b/synapse/storage/databases/main/state.py
@@ -36,6 +36,7 @@ from synapse.storage.state import StateFilter
 from synapse.types import JsonDict, JsonMapping, StateMap
 from synapse.util.caches import intern_string
 from synapse.util.caches.descriptors import cached, cachedList
+from synapse.util.cancellation import cancellable
 from synapse.util.iterutils import batch_iter
 
 if TYPE_CHECKING:
@@ -281,6 +282,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
         )
 
     # FIXME: how should this be cached?
+    @cancellable
     async def get_partial_filtered_current_state_ids(
         self, room_id: str, state_filter: Optional[StateFilter] = None
     ) -> StateMap[str]:
diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py
index a347430aa7..3f9bfaeac5 100644
--- a/synapse/storage/databases/main/stream.py
+++ b/synapse/storage/databases/main/stream.py
@@ -72,6 +72,7 @@ from synapse.storage.util.id_generators import MultiWriterIdGenerator
 from synapse.types import PersistedEventPosition, RoomStreamToken
 from synapse.util.caches.descriptors import cached
 from synapse.util.caches.stream_change_cache import StreamChangeCache
+from synapse.util.cancellation import cancellable
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -597,6 +598,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
 
         return ret, key
 
+    @cancellable
     async def get_membership_changes_for_user(
         self,
         user_id: str,
diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py
index bb64543c1f..f8cfcaca83 100644
--- a/synapse/storage/databases/state/store.py
+++ b/synapse/storage/databases/state/store.py
@@ -31,6 +31,7 @@ from synapse.storage.util.sequence import build_sequence_generator
 from synapse.types import MutableStateMap, StateKey, StateMap
 from synapse.util.caches.descriptors import cached
 from synapse.util.caches.dictionary_cache import DictionaryCache
+from synapse.util.cancellation import cancellable
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -156,6 +157,7 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
             "get_state_group_delta", _get_state_group_delta_txn
         )
 
+    @cancellable
     async def _get_state_groups_from_groups(
         self, groups: List[int], state_filter: StateFilter
     ) -> Dict[int, StateMap[str]]:
@@ -235,6 +237,7 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
 
         return state_filter.filter_state(state_dict_ids), not missing_types
 
+    @cancellable
     async def _get_state_for_groups(
         self, groups: Iterable[int], state_filter: Optional[StateFilter] = None
     ) -> Dict[int, MutableStateMap[str]]:
diff --git a/synapse/storage/util/partial_state_events_tracker.py b/synapse/storage/util/partial_state_events_tracker.py
index b4bf49dace..8d8894d1d5 100644
--- a/synapse/storage/util/partial_state_events_tracker.py
+++ b/synapse/storage/util/partial_state_events_tracker.py
@@ -24,6 +24,7 @@ from synapse.logging.opentracing import trace_with_opname
 from synapse.storage.databases.main.events_worker import EventsWorkerStore
 from synapse.storage.databases.main.room import RoomWorkerStore
 from synapse.util import unwrapFirstError
+from synapse.util.cancellation import cancellable
 
 logger = logging.getLogger(__name__)
 
@@ -60,6 +61,7 @@ class PartialStateEventsTracker:
                 o.callback(None)
 
     @trace_with_opname("PartialStateEventsTracker.await_full_state")
+    @cancellable
     async def await_full_state(self, event_ids: Collection[str]) -> None:
         """Wait for all the given events to have full state.
 
@@ -154,6 +156,7 @@ class PartialCurrentStateTracker:
                 o.callback(None)
 
     @trace_with_opname("PartialCurrentStateTracker.await_full_state")
+    @cancellable
     async def await_full_state(self, room_id: str) -> None:
         # We add the deferred immediately so that the DB call to check for
         # partial state doesn't race when we unpartial the room.
diff --git a/synapse/types.py b/synapse/types.py
index 668d48d646..ec44601f54 100644
--- a/synapse/types.py
+++ b/synapse/types.py
@@ -52,6 +52,7 @@ from twisted.internet.interfaces import (
 )
 
 from synapse.api.errors import Codes, SynapseError
+from synapse.util.cancellation import cancellable
 from synapse.util.stringutils import parse_and_validate_server_name
 
 if TYPE_CHECKING:
@@ -699,7 +700,11 @@ class StreamToken:
     START: ClassVar["StreamToken"]
 
     @classmethod
+    @cancellable
     async def from_string(cls, store: "DataStore", string: str) -> "StreamToken":
+        """
+        Creates a RoomStreamToken from its textual representation.
+        """
         try:
             keys = string.split(cls._SEPARATOR)
             while len(keys) < len(attr.fields(cls)):
diff --git a/tests/http/server/_base.py b/tests/http/server/_base.py
index 5726e60cee..5071f83574 100644
--- a/tests/http/server/_base.py
+++ b/tests/http/server/_base.py
@@ -140,6 +140,8 @@ def make_request_with_cancellation_test(
     method: str,
     path: str,
     content: Union[bytes, str, JsonDict] = b"",
+    *,
+    token: Optional[str] = None,
 ) -> FakeChannel:
     """Performs a request repeatedly, disconnecting at successive `await`s, until
     one completes.
@@ -211,7 +213,13 @@ def make_request_with_cancellation_test(
                 with deferred_patch.patch():
                     # Start the request.
                     channel = make_request(
-                        reactor, site, method, path, content, await_result=False
+                        reactor,
+                        site,
+                        method,
+                        path,
+                        content,
+                        await_result=False,
+                        access_token=token,
                     )
                     request = channel.request
 
diff --git a/tests/rest/client/test_keys.py b/tests/rest/client/test_keys.py
index bbc8e74243..741fecea77 100644
--- a/tests/rest/client/test_keys.py
+++ b/tests/rest/client/test_keys.py
@@ -19,6 +19,7 @@ from synapse.rest import admin
 from synapse.rest.client import keys, login
 
 from tests import unittest
+from tests.http.server._base import make_request_with_cancellation_test
 
 
 class KeyQueryTestCase(unittest.HomeserverTestCase):
@@ -89,3 +90,31 @@ class KeyQueryTestCase(unittest.HomeserverTestCase):
             Codes.BAD_JSON,
             channel.result,
         )
+
+    def test_key_query_cancellation(self) -> None:
+        """
+        Tests that /keys/query is cancellable and does not swallow the
+        CancelledError.
+        """
+        self.register_user("alice", "wonderland")
+        alice_token = self.login("alice", "wonderland")
+
+        bob = self.register_user("bob", "uncle")
+
+        channel = make_request_with_cancellation_test(
+            "test_key_query_cancellation",
+            self.reactor,
+            self.site,
+            "POST",
+            "/_matrix/client/r0/keys/query",
+            {
+                "device_keys": {
+                    # Empty list means we request keys for all bob's devices
+                    bob: [],
+                },
+            },
+            token=alice_token,
+        )
+
+        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertIn(bob, channel.json_body["device_keys"])
-- 
cgit 1.5.1


From b58386e37e30e920332e4b04011b528a66a39fad Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 7 Sep 2022 12:16:10 +0100
Subject: A second batch of Pydantic models for rest/client/account.py (#13687)

---
 changelog.d/13687.feature      |  1 +
 synapse/http/servlet.py        | 19 +++++++++++++--
 synapse/rest/client/account.py | 54 ++++++++++++++++++++----------------------
 synapse/rest/client/models.py  | 24 +++++++++++++++----
 4 files changed, 64 insertions(+), 34 deletions(-)
 create mode 100644 changelog.d/13687.feature

(limited to 'synapse')

diff --git a/changelog.d/13687.feature b/changelog.d/13687.feature
new file mode 100644
index 0000000000..dac53ec122
--- /dev/null
+++ b/changelog.d/13687.feature
@@ -0,0 +1 @@
+Improve validation of request bodies for the following client-server API endpoints: [`/account/3pid/msisdn/requestToken`](https://spec.matrix.org/v1.3/client-server-api/#post_matrixclientv3account3pidmsisdnrequesttoken) and [`/org.matrix.msc3720/account_status`](https://github.com/matrix-org/matrix-spec-proposals/blob/babolivier/user_status/proposals/3720-account-status.md#post-_matrixclientv1account_status).
\ No newline at end of file
diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py
index 26aaabfb34..80acbdcf3c 100644
--- a/synapse/http/servlet.py
+++ b/synapse/http/servlet.py
@@ -28,7 +28,8 @@ from typing import (
     overload,
 )
 
-from pydantic import BaseModel, ValidationError
+from pydantic import BaseModel, MissingError, PydanticValueError, ValidationError
+from pydantic.error_wrappers import ErrorWrapper
 from typing_extensions import Literal
 
 from twisted.web.server import Request
@@ -714,7 +715,21 @@ def parse_and_validate_json_object_from_request(
     try:
         instance = model_type.parse_obj(content)
     except ValidationError as e:
-        raise SynapseError(HTTPStatus.BAD_REQUEST, str(e), errcode=Codes.BAD_JSON)
+        # Choose a matrix error code. The catch-all is BAD_JSON, but we try to find a
+        # more specific error if possible (which occasionally helps us to be spec-
+        # compliant) This is a bit awkward because the spec's error codes aren't very
+        # clear-cut: BAD_JSON arguably overlaps with MISSING_PARAM and INVALID_PARAM.
+        errcode = Codes.BAD_JSON
+
+        raw_errors = e.raw_errors
+        if len(raw_errors) == 1 and isinstance(raw_errors[0], ErrorWrapper):
+            raw_error = raw_errors[0].exc
+            if isinstance(raw_error, MissingError):
+                errcode = Codes.MISSING_PARAM
+            elif isinstance(raw_error, PydanticValueError):
+                errcode = Codes.INVALID_PARAM
+
+        raise SynapseError(HTTPStatus.BAD_REQUEST, str(e), errcode=errcode)
 
     return instance
 
diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py
index 1f9a8ccc23..a09aaf3448 100644
--- a/synapse/rest/client/account.py
+++ b/synapse/rest/client/account.py
@@ -15,7 +15,7 @@
 # limitations under the License.
 import logging
 import random
-from typing import TYPE_CHECKING, Optional, Tuple
+from typing import TYPE_CHECKING, List, Optional, Tuple
 from urllib.parse import urlparse
 
 from pydantic import StrictBool, StrictStr, constr
@@ -41,7 +41,11 @@ from synapse.http.servlet import (
 from synapse.http.site import SynapseRequest
 from synapse.metrics import threepid_send_requests
 from synapse.push.mailer import Mailer
-from synapse.rest.client.models import AuthenticationData, EmailRequestTokenBody
+from synapse.rest.client.models import (
+    AuthenticationData,
+    EmailRequestTokenBody,
+    MsisdnRequestTokenBody,
+)
 from synapse.rest.models import RequestBodyModel
 from synapse.types import JsonDict
 from synapse.util.msisdn import phone_number_to_msisdn
@@ -400,23 +404,16 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet):
         self.identity_handler = hs.get_identity_handler()
 
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
-        body = parse_json_object_from_request(request)
-        assert_params_in_dict(
-            body, ["client_secret", "country", "phone_number", "send_attempt"]
+        body = parse_and_validate_json_object_from_request(
+            request, MsisdnRequestTokenBody
         )
-        client_secret = body["client_secret"]
-        assert_valid_client_secret(client_secret)
-
-        country = body["country"]
-        phone_number = body["phone_number"]
-        send_attempt = body["send_attempt"]
-        next_link = body.get("next_link")  # Optional param
-
-        msisdn = phone_number_to_msisdn(country, phone_number)
+        msisdn = phone_number_to_msisdn(body.country, body.phone_number)
 
         if not await check_3pid_allowed(self.hs, "msisdn", msisdn):
             raise SynapseError(
                 403,
+                # TODO: is this error message accurate? Looks like we've only rejected
+                #       this phone number, not necessarily all phone numbers
                 "Account phone numbers are not authorized on this server",
                 Codes.THREEPID_DENIED,
             )
@@ -425,9 +422,9 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet):
             request, "msisdn", msisdn
         )
 
-        if next_link:
+        if body.next_link:
             # Raise if the provided next_link value isn't valid
-            assert_valid_next_link(self.hs, next_link)
+            assert_valid_next_link(self.hs, body.next_link)
 
         existing_user_id = await self.store.get_user_id_by_threepid("msisdn", msisdn)
 
@@ -454,15 +451,15 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet):
 
         ret = await self.identity_handler.requestMsisdnToken(
             self.hs.config.registration.account_threepid_delegate_msisdn,
-            country,
-            phone_number,
-            client_secret,
-            send_attempt,
-            next_link,
+            body.country,
+            body.phone_number,
+            body.client_secret,
+            body.send_attempt,
+            body.next_link,
         )
 
         threepid_send_requests.labels(type="msisdn", reason="add_threepid").observe(
-            send_attempt
+            body.send_attempt
         )
 
         return 200, ret
@@ -845,17 +842,18 @@ class AccountStatusRestServlet(RestServlet):
         self._auth = hs.get_auth()
         self._account_handler = hs.get_account_handler()
 
+    class PostBody(RequestBodyModel):
+        # TODO: we could validate that each user id is an mxid here, and/or parse it
+        #       as a UserID
+        user_ids: List[StrictStr]
+
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         await self._auth.get_user_by_req(request)
 
-        body = parse_json_object_from_request(request)
-        if "user_ids" not in body:
-            raise SynapseError(
-                400, "Required parameter 'user_ids' is missing", Codes.MISSING_PARAM
-            )
+        body = parse_and_validate_json_object_from_request(request, self.PostBody)
 
         statuses, failures = await self._account_handler.get_account_statuses(
-            body["user_ids"],
+            body.user_ids,
             allow_remote=True,
         )
 
diff --git a/synapse/rest/client/models.py b/synapse/rest/client/models.py
index 3150602997..6278450c70 100644
--- a/synapse/rest/client/models.py
+++ b/synapse/rest/client/models.py
@@ -25,8 +25,8 @@ class AuthenticationData(RequestBodyModel):
 
     (The name "Authentication Data" is taken directly from the spec.)
 
-    Additional keys will be present, depending on the `type` field. Use `.dict()` to
-    access them.
+    Additional keys will be present, depending on the `type` field. Use
+    `.dict(exclude_unset=True)` to access them.
     """
 
     class Config:
@@ -36,7 +36,7 @@ class AuthenticationData(RequestBodyModel):
     type: Optional[StrictStr] = None
 
 
-class EmailRequestTokenBody(RequestBodyModel):
+class ThreePidRequestTokenBody(RequestBodyModel):
     if TYPE_CHECKING:
         client_secret: StrictStr
     else:
@@ -47,7 +47,7 @@ class EmailRequestTokenBody(RequestBodyModel):
             max_length=255,
             strict=True,
         )
-    email: StrictStr
+
     id_server: Optional[StrictStr]
     id_access_token: Optional[StrictStr]
     next_link: Optional[StrictStr]
@@ -61,9 +61,25 @@ class EmailRequestTokenBody(RequestBodyModel):
             raise ValueError("id_access_token is required if an id_server is supplied.")
         return token
 
+
+class EmailRequestTokenBody(ThreePidRequestTokenBody):
+    email: StrictStr
+
     # Canonicalise the email address. The addresses are all stored canonicalised
     # in the database. This allows the user to reset his password without having to
     # know the exact spelling (eg. upper and lower case) of address in the database.
     # Without this, an email stored in the database as "foo@bar.com" would cause
     # user requests for "FOO@bar.com" to raise a Not Found error.
     _email_validator = validator("email", allow_reuse=True)(validate_email)
+
+
+if TYPE_CHECKING:
+    ISO3116_1_Alpha_2 = StrictStr
+else:
+    # Per spec: two-letter uppercase ISO-3166-1-alpha-2
+    ISO3116_1_Alpha_2 = constr(regex="[A-Z]{2}", strict=True)
+
+
+class MsisdnRequestTokenBody(ThreePidRequestTokenBody):
+    country: ISO3116_1_Alpha_2
+    phone_number: StrictStr
-- 
cgit 1.5.1


From d4d3249ded000219a4f875943632c3d0f928d58d Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Wed, 7 Sep 2022 11:41:52 -0500
Subject: Instrument `get_metadata_for_events` for tracing (#13730)

When backfilling, `_get_state_ids_after_missing_prev_event` calls [`get_metadata_for_events`](https://github.com/matrix-org/synapse/blob/26bc26586b4b95d63ce7e453e9312469843f796e/synapse/handlers/federation_event.py#L1133). For `#matrix:matrix.org`, it's called with 77k `state_events` which means 77 calls to the database and takes 28 seconds.
---
 changelog.d/13730.misc                  | 1 +
 synapse/storage/databases/main/state.py | 2 ++
 2 files changed, 3 insertions(+)
 create mode 100644 changelog.d/13730.misc

(limited to 'synapse')

diff --git a/changelog.d/13730.misc b/changelog.d/13730.misc
new file mode 100644
index 0000000000..06da6581a4
--- /dev/null
+++ b/changelog.d/13730.misc
@@ -0,0 +1 @@
+Instrument `get_metadata_for_events` for understandable traces in Jaeger.
diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py
index e607ccfdc9..af7bebee80 100644
--- a/synapse/storage/databases/main/state.py
+++ b/synapse/storage/databases/main/state.py
@@ -23,6 +23,7 @@ from synapse.api.errors import NotFoundError, UnsupportedRoomVersionError
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion
 from synapse.events import EventBase
 from synapse.events.snapshot import EventContext
+from synapse.logging.opentracing import trace
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.database import (
     DatabasePool,
@@ -143,6 +144,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
 
         return room_version
 
+    @trace
     async def get_metadata_for_events(
         self, event_ids: Collection[str]
     ) -> Dict[str, EventMetadata]:
-- 
cgit 1.5.1


From b7e4bfd005a804ed0cd3d204c367b11565b76df6 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Thu, 8 Sep 2022 10:18:03 +0000
Subject: Fix a bug where Synapse fails to start if a signing key file contains
 an empty line. (#13738)

---
 changelog.d/13738.bugfix |  1 +
 synapse/config/key.py    | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13738.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13738.bugfix b/changelog.d/13738.bugfix
new file mode 100644
index 0000000000..d64fa0b4de
--- /dev/null
+++ b/changelog.d/13738.bugfix
@@ -0,0 +1 @@
+Fix a bug where Synapse fails to start if a signing key file contains an empty line.
\ No newline at end of file
diff --git a/synapse/config/key.py b/synapse/config/key.py
index cc75efdf8f..f3dc4df695 100644
--- a/synapse/config/key.py
+++ b/synapse/config/key.py
@@ -217,7 +217,18 @@ class KeyConfig(Config):
 
         signing_keys = self.read_file(signing_key_path, name)
         try:
-            return read_signing_keys(signing_keys.splitlines(True))
+            loaded_signing_keys = read_signing_keys(
+                [
+                    signing_key_line
+                    for signing_key_line in signing_keys.splitlines(keepends=False)
+                    if signing_key_line.strip()
+                ]
+            )
+
+            if not loaded_signing_keys:
+                raise ConfigError(f"No signing keys in file {signing_key_path}")
+
+            return loaded_signing_keys
         except Exception as e:
             raise ConfigError("Error reading %s: %s" % (name, str(e)))
 
-- 
cgit 1.5.1


From 526f84bc2ea78a44ef612bf79caf5cfb7966c1ed Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Thu, 8 Sep 2022 14:01:42 +0000
Subject: Fix Prometheus recording rules to not use legacy metric names.
 (#13718)

---
 changelog.d/13718.misc                |  1 +
 contrib/prometheus/synapse-v1.rules   | 21 ---------------------
 contrib/prometheus/synapse-v2.rules   | 22 +++++++++++-----------
 synapse/app/phone_stats_home.py       |  6 +++---
 synapse/federation/sender/__init__.py |  4 ++--
 synapse/metrics/_legacy_exposition.py |  5 +++++
 6 files changed, 22 insertions(+), 37 deletions(-)
 create mode 100644 changelog.d/13718.misc
 delete mode 100644 contrib/prometheus/synapse-v1.rules

(limited to 'synapse')

diff --git a/changelog.d/13718.misc b/changelog.d/13718.misc
new file mode 100644
index 0000000000..07ace50b12
--- /dev/null
+++ b/changelog.d/13718.misc
@@ -0,0 +1 @@
+Add experimental configuration option to allow disabling legacy Prometheus metric names.
\ No newline at end of file
diff --git a/contrib/prometheus/synapse-v1.rules b/contrib/prometheus/synapse-v1.rules
deleted file mode 100644
index 4c900ba537..0000000000
--- a/contrib/prometheus/synapse-v1.rules
+++ /dev/null
@@ -1,21 +0,0 @@
-synapse_federation_transaction_queue_pendingEdus:total = sum(synapse_federation_transaction_queue_pendingEdus or absent(synapse_federation_transaction_queue_pendingEdus)*0)
-synapse_federation_transaction_queue_pendingPdus:total = sum(synapse_federation_transaction_queue_pendingPdus or absent(synapse_federation_transaction_queue_pendingPdus)*0)
-
-synapse_http_server_request_count:method{servlet=""} = sum(synapse_http_server_request_count) by (method)
-synapse_http_server_request_count:servlet{method=""} = sum(synapse_http_server_request_count) by (servlet)
-
-synapse_http_server_request_count:total{servlet=""} = sum(synapse_http_server_request_count:by_method) by (servlet)
-
-synapse_cache:hit_ratio_5m = rate(synapse_util_caches_cache:hits[5m]) / rate(synapse_util_caches_cache:total[5m])
-synapse_cache:hit_ratio_30s = rate(synapse_util_caches_cache:hits[30s]) / rate(synapse_util_caches_cache:total[30s])
-
-synapse_federation_client_sent{type="EDU"} = synapse_federation_client_sent_edus + 0
-synapse_federation_client_sent{type="PDU"} = synapse_federation_client_sent_pdu_destinations:count + 0
-synapse_federation_client_sent{type="Query"} = sum(synapse_federation_client_sent_queries) by (job)
-
-synapse_federation_server_received{type="EDU"} = synapse_federation_server_received_edus + 0
-synapse_federation_server_received{type="PDU"} = synapse_federation_server_received_pdus + 0
-synapse_federation_server_received{type="Query"} = sum(synapse_federation_server_received_queries) by (job)
-
-synapse_federation_transaction_queue_pending{type="EDU"} = synapse_federation_transaction_queue_pending_edus + 0
-synapse_federation_transaction_queue_pending{type="PDU"} = synapse_federation_transaction_queue_pending_pdus + 0
diff --git a/contrib/prometheus/synapse-v2.rules b/contrib/prometheus/synapse-v2.rules
index 7e405bf7f0..a5e6a735cd 100644
--- a/contrib/prometheus/synapse-v2.rules
+++ b/contrib/prometheus/synapse-v2.rules
@@ -20,18 +20,18 @@ groups:
     expr: 'sum(synapse_http_server_request_count:by_method) by (servlet)'
 
   - record: 'synapse_cache:hit_ratio_5m'
-    expr: 'rate(synapse_util_caches_cache:hits[5m]) / rate(synapse_util_caches_cache:total[5m])'
+    expr: 'rate(synapse_util_caches_cache_hits[5m]) / rate(synapse_util_caches_cache[5m])'
   - record: 'synapse_cache:hit_ratio_30s'
-    expr: 'rate(synapse_util_caches_cache:hits[30s]) / rate(synapse_util_caches_cache:total[30s])'
+    expr: 'rate(synapse_util_caches_cache_hits[30s]) / rate(synapse_util_caches_cache[30s])'
 
   - record: 'synapse_federation_client_sent'
     labels:
       type: "EDU"
-    expr: 'synapse_federation_client_sent_edus + 0'
+    expr: 'synapse_federation_client_sent_edus_total + 0'
   - record: 'synapse_federation_client_sent'
     labels:
       type: "PDU"
-    expr: 'synapse_federation_client_sent_pdu_destinations:count + 0'
+    expr: 'synapse_federation_client_sent_pdu_destinations_count_total + 0'
   - record: 'synapse_federation_client_sent'
     labels:
       type: "Query"
@@ -40,11 +40,11 @@ groups:
   - record: 'synapse_federation_server_received'
     labels:
       type: "EDU"
-    expr: 'synapse_federation_server_received_edus + 0'
+    expr: 'synapse_federation_server_received_edus_total + 0'
   - record: 'synapse_federation_server_received'
     labels:
       type: "PDU"
-    expr: 'synapse_federation_server_received_pdus + 0'
+    expr: 'synapse_federation_server_received_pdus_total + 0'
   - record: 'synapse_federation_server_received'
     labels:
       type: "Query"
@@ -60,19 +60,19 @@ groups:
     expr: 'synapse_federation_transaction_queue_pending_pdus + 0'
 
   - record: synapse_storage_events_persisted_by_source_type
-    expr: sum without(type, origin_type, origin_entity) (synapse_storage_events_persisted_events_sep{origin_type="remote"})
+    expr: sum without(type, origin_type, origin_entity) (synapse_storage_events_persisted_events_sep_total{origin_type="remote"})
     labels:
       type: remote
   - record: synapse_storage_events_persisted_by_source_type
-    expr: sum without(type, origin_type, origin_entity) (synapse_storage_events_persisted_events_sep{origin_entity="*client*",origin_type="local"})
+    expr: sum without(type, origin_type, origin_entity) (synapse_storage_events_persisted_events_sep_total{origin_entity="*client*",origin_type="local"})
     labels:
       type: local
   - record: synapse_storage_events_persisted_by_source_type
-    expr: sum without(type, origin_type, origin_entity) (synapse_storage_events_persisted_events_sep{origin_entity!="*client*",origin_type="local"})
+    expr: sum without(type, origin_type, origin_entity) (synapse_storage_events_persisted_events_sep_total{origin_entity!="*client*",origin_type="local"})
     labels:
       type: bridges
   - record: synapse_storage_events_persisted_by_event_type
-    expr: sum without(origin_entity, origin_type) (synapse_storage_events_persisted_events_sep)
+    expr: sum without(origin_entity, origin_type) (synapse_storage_events_persisted_events_sep_total)
   - record: synapse_storage_events_persisted_by_origin
-    expr: sum without(type) (synapse_storage_events_persisted_events_sep)
+    expr: sum without(type) (synapse_storage_events_persisted_events_sep_total)
 
diff --git a/synapse/app/phone_stats_home.py b/synapse/app/phone_stats_home.py
index 51c8d15711..53db1e85b3 100644
--- a/synapse/app/phone_stats_home.py
+++ b/synapse/app/phone_stats_home.py
@@ -32,15 +32,15 @@ logger = logging.getLogger("synapse.app.homeserver")
 _stats_process: List[Tuple[int, "resource.struct_rusage"]] = []
 
 # Gauges to expose monthly active user control metrics
-current_mau_gauge = Gauge("synapse_admin_mau:current", "Current MAU")
+current_mau_gauge = Gauge("synapse_admin_mau_current", "Current MAU")
 current_mau_by_service_gauge = Gauge(
     "synapse_admin_mau_current_mau_by_service",
     "Current MAU by service",
     ["app_service"],
 )
-max_mau_gauge = Gauge("synapse_admin_mau:max", "MAU Limit")
+max_mau_gauge = Gauge("synapse_admin_mau_max", "MAU Limit")
 registered_reserved_users_mau_gauge = Gauge(
-    "synapse_admin_mau:registered_reserved_users",
+    "synapse_admin_mau_registered_reserved_users",
     "Registered users with reserved threepids",
 )
 
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index 8bc60e3e3e..a6cb3ba58f 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -62,12 +62,12 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 sent_pdus_destination_dist_count = Counter(
-    "synapse_federation_client_sent_pdu_destinations:count",
+    "synapse_federation_client_sent_pdu_destinations_count",
     "Number of PDUs queued for sending to one or more destinations",
 )
 
 sent_pdus_destination_dist_total = Counter(
-    "synapse_federation_client_sent_pdu_destinations:total",
+    "synapse_federation_client_sent_pdu_destinations",
     "Total number of PDUs queued for sending across all destinations",
 )
 
diff --git a/synapse/metrics/_legacy_exposition.py b/synapse/metrics/_legacy_exposition.py
index 6f00ff2a47..133f1603dd 100644
--- a/synapse/metrics/_legacy_exposition.py
+++ b/synapse/metrics/_legacy_exposition.py
@@ -93,6 +93,11 @@ LEGACY_METRIC_NAMES = {
     "synapse_util_caches_response_cache_hits": "synapse_util_caches_response_cache:hits",
     "synapse_util_caches_response_cache_evicted_size": "synapse_util_caches_response_cache:evicted_size",
     "synapse_util_caches_response_cache": "synapse_util_caches_response_cache:total",
+    "synapse_federation_client_sent_pdu_destinations": "synapse_federation_client_sent_pdu_destinations:total",
+    "synapse_federation_client_sent_pdu_destinations_count": "synapse_federation_client_sent_pdu_destinations:count",
+    "synapse_admin_mau_current": "synapse_admin_mau:current",
+    "synapse_admin_mau_max": "synapse_admin_mau:max",
+    "synapse_admin_mau_registered_reserved_users": "synapse_admin_mau:registered_reserved_users",
 }
 
 
-- 
cgit 1.5.1


From cf11919ddd4f48b2f59062542ba62969042f80aa Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Thu, 8 Sep 2022 14:30:48 +0000
Subject: Fix cache metrics not being updated when not using the legacy
 exposition module. (#13717)

---
 changelog.d/13717.misc                |  1 +
 synapse/metrics/_legacy_exposition.py |  7 ----
 synapse/util/caches/__init__.py       | 60 ++++++++++++++++++++++++++++-------
 synapse/util/metrics.py               | 34 ++++++++++++++++++--
 4 files changed, 81 insertions(+), 21 deletions(-)
 create mode 100644 changelog.d/13717.misc

(limited to 'synapse')

diff --git a/changelog.d/13717.misc b/changelog.d/13717.misc
new file mode 100644
index 0000000000..07ace50b12
--- /dev/null
+++ b/changelog.d/13717.misc
@@ -0,0 +1 @@
+Add experimental configuration option to allow disabling legacy Prometheus metric names.
\ No newline at end of file
diff --git a/synapse/metrics/_legacy_exposition.py b/synapse/metrics/_legacy_exposition.py
index 133f1603dd..563d8cc2c6 100644
--- a/synapse/metrics/_legacy_exposition.py
+++ b/synapse/metrics/_legacy_exposition.py
@@ -34,8 +34,6 @@ from prometheus_client.core import Sample
 from twisted.web.resource import Resource
 from twisted.web.server import Request
 
-from synapse.util import caches
-
 CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8"
 
 
@@ -107,11 +105,6 @@ def generate_latest(registry: CollectorRegistry, emit_help: bool = False) -> byt
     by prometheus-client.
     """
 
-    # Trigger the cache metrics to be rescraped, which updates the common
-    # metrics but do not produce metrics themselves
-    for collector in caches.collectors_by_name.values():
-        collector.collect()
-
     output = []
 
     for metric in registry.collect():
diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py
index d4a2b77c29..35c0be08b0 100644
--- a/synapse/util/caches/__init__.py
+++ b/synapse/util/caches/__init__.py
@@ -20,9 +20,11 @@ from sys import intern
 from typing import Any, Callable, Dict, List, Optional, Sized, TypeVar
 
 import attr
+from prometheus_client import REGISTRY
 from prometheus_client.core import Gauge
 
 from synapse.config.cache import add_resizable_cache
+from synapse.util.metrics import DynamicCollectorRegistry
 
 logger = logging.getLogger(__name__)
 
@@ -30,27 +32,62 @@ logger = logging.getLogger(__name__)
 # Whether to track estimated memory usage of the LruCaches.
 TRACK_MEMORY_USAGE = False
 
+# We track cache metrics in a special registry that lets us update the metrics
+# just before they are returned from the scrape endpoint.
+CACHE_METRIC_REGISTRY = DynamicCollectorRegistry()
 
 caches_by_name: Dict[str, Sized] = {}
-collectors_by_name: Dict[str, "CacheMetric"] = {}
 
-cache_size = Gauge("synapse_util_caches_cache_size", "", ["name"])
-cache_hits = Gauge("synapse_util_caches_cache_hits", "", ["name"])
-cache_evicted = Gauge("synapse_util_caches_cache_evicted_size", "", ["name", "reason"])
-cache_total = Gauge("synapse_util_caches_cache", "", ["name"])
-cache_max_size = Gauge("synapse_util_caches_cache_max_size", "", ["name"])
+cache_size = Gauge(
+    "synapse_util_caches_cache_size", "", ["name"], registry=CACHE_METRIC_REGISTRY
+)
+cache_hits = Gauge(
+    "synapse_util_caches_cache_hits", "", ["name"], registry=CACHE_METRIC_REGISTRY
+)
+cache_evicted = Gauge(
+    "synapse_util_caches_cache_evicted_size",
+    "",
+    ["name", "reason"],
+    registry=CACHE_METRIC_REGISTRY,
+)
+cache_total = Gauge(
+    "synapse_util_caches_cache", "", ["name"], registry=CACHE_METRIC_REGISTRY
+)
+cache_max_size = Gauge(
+    "synapse_util_caches_cache_max_size", "", ["name"], registry=CACHE_METRIC_REGISTRY
+)
 cache_memory_usage = Gauge(
     "synapse_util_caches_cache_size_bytes",
     "Estimated memory usage of the caches",
     ["name"],
+    registry=CACHE_METRIC_REGISTRY,
 )
 
-response_cache_size = Gauge("synapse_util_caches_response_cache_size", "", ["name"])
-response_cache_hits = Gauge("synapse_util_caches_response_cache_hits", "", ["name"])
+response_cache_size = Gauge(
+    "synapse_util_caches_response_cache_size",
+    "",
+    ["name"],
+    registry=CACHE_METRIC_REGISTRY,
+)
+response_cache_hits = Gauge(
+    "synapse_util_caches_response_cache_hits",
+    "",
+    ["name"],
+    registry=CACHE_METRIC_REGISTRY,
+)
 response_cache_evicted = Gauge(
-    "synapse_util_caches_response_cache_evicted_size", "", ["name", "reason"]
+    "synapse_util_caches_response_cache_evicted_size",
+    "",
+    ["name", "reason"],
+    registry=CACHE_METRIC_REGISTRY,
 )
-response_cache_total = Gauge("synapse_util_caches_response_cache", "", ["name"])
+response_cache_total = Gauge(
+    "synapse_util_caches_response_cache", "", ["name"], registry=CACHE_METRIC_REGISTRY
+)
+
+
+# Register our custom cache metrics registry with the global registry
+REGISTRY.register(CACHE_METRIC_REGISTRY)
 
 
 class EvictionReason(Enum):
@@ -168,9 +205,8 @@ def register_cache(
         add_resizable_cache(cache_name, resize_callback)
 
     metric = CacheMetric(cache, cache_type, cache_name, collect_callback)
-    metric_name = "cache_%s_%s" % (cache_type, cache_name)
     caches_by_name[cache_name] = cache
-    collectors_by_name[metric_name] = metric
+    CACHE_METRIC_REGISTRY.register_hook(metric.collect)
     return metric
 
 
diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py
index bc3b4938ea..9687120ebf 100644
--- a/synapse/util/metrics.py
+++ b/synapse/util/metrics.py
@@ -15,9 +15,9 @@
 import logging
 from functools import wraps
 from types import TracebackType
-from typing import Awaitable, Callable, Optional, Type, TypeVar
+from typing import Awaitable, Callable, Generator, List, Optional, Type, TypeVar
 
-from prometheus_client import Counter
+from prometheus_client import CollectorRegistry, Counter, Metric
 from typing_extensions import Concatenate, ParamSpec, Protocol
 
 from synapse.logging.context import (
@@ -208,3 +208,33 @@ class Measure:
         metrics.real_time_sum += duration
 
         # TODO: Add other in flight metrics.
+
+
+class DynamicCollectorRegistry(CollectorRegistry):
+    """
+    Custom Prometheus Collector registry that calls a hook first, allowing you
+    to update metrics on-demand.
+
+    Don't forget to register this registry with the main registry!
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+        self._pre_update_hooks: List[Callable[[], None]] = []
+
+    def collect(self) -> Generator[Metric, None, None]:
+        """
+        Collects metrics, calling pre-update hooks first.
+        """
+
+        for pre_update_hook in self._pre_update_hooks:
+            pre_update_hook()
+
+        yield from super().collect()
+
+    def register_hook(self, hook: Callable[[], None]) -> None:
+        """
+        Registers a hook that is called before metric collection.
+        """
+
+        self._pre_update_hooks.append(hook)
-- 
cgit 1.5.1


From 8ef0c8ff14fcf613c5df1cfc30b38236de1695a7 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Thu, 8 Sep 2022 15:54:36 +0100
Subject: Fix error in `is_mine_id` when encountering a malformed ID (#13746)

Previously, `is_mine_id` would raise an exception when passed an ID with
no colons. Return `False` instead.

Fixes #13040.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/13746.bugfix |  1 +
 synapse/server.py        | 12 +++++++++++-
 tests/test_types.py      | 26 +++++++++++++++++++++++++-
 3 files changed, 37 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13746.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13746.bugfix b/changelog.d/13746.bugfix
new file mode 100644
index 0000000000..b692af8fd5
--- /dev/null
+++ b/changelog.d/13746.bugfix
@@ -0,0 +1 @@
+Fix a long standing bug where Synapse would fail to handle malformed user IDs or room aliases gracefully in certain cases.
diff --git a/synapse/server.py b/synapse/server.py
index 5a99c0b344..df3a1cb405 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -341,7 +341,17 @@ class HomeServer(metaclass=abc.ABCMeta):
         return domain_specific_string.domain == self.hostname
 
     def is_mine_id(self, string: str) -> bool:
-        return string.split(":", 1)[1] == self.hostname
+        """Determines whether a user ID or room alias originates from this homeserver.
+
+        Returns:
+            `True` if the hostname part of the user ID or room alias matches this
+            homeserver.
+            `False` otherwise, or if the user ID or room alias is malformed.
+        """
+        localpart_hostname = string.split(":", 1)
+        if len(localpart_hostname) < 2:
+            return False
+        return localpart_hostname[1] == self.hostname
 
     @cache_in_self
     def get_clock(self) -> Clock:
diff --git a/tests/test_types.py b/tests/test_types.py
index d8d82a517e..1111169384 100644
--- a/tests/test_types.py
+++ b/tests/test_types.py
@@ -13,11 +13,35 @@
 # limitations under the License.
 
 from synapse.api.errors import SynapseError
-from synapse.types import RoomAlias, UserID, map_username_to_mxid_localpart
+from synapse.types import (
+    RoomAlias,
+    UserID,
+    get_domain_from_id,
+    get_localpart_from_id,
+    map_username_to_mxid_localpart,
+)
 
 from tests import unittest
 
 
+class IsMineIDTests(unittest.HomeserverTestCase):
+    def test_is_mine_id(self) -> None:
+        self.assertTrue(self.hs.is_mine_id("@user:test"))
+        self.assertTrue(self.hs.is_mine_id("#room:test"))
+        self.assertTrue(self.hs.is_mine_id("invalid:test"))
+
+        self.assertFalse(self.hs.is_mine_id("@user:test\0"))
+        self.assertFalse(self.hs.is_mine_id("@user"))
+
+    def test_two_colons(self) -> None:
+        """Test handling of IDs containing more than one colon."""
+        # The domain starts after the first colon.
+        # These functions must interpret things consistently.
+        self.assertFalse(self.hs.is_mine_id("@user:test:test"))
+        self.assertEqual("user", get_localpart_from_id("@user:test:test"))
+        self.assertEqual("test:test", get_domain_from_id("@user:test:test"))
+
+
 class UserIDTestCase(unittest.HomeserverTestCase):
     def test_parse(self):
         user = UserID.from_string("@1234abcd:test")
-- 
cgit 1.5.1


From 89e8b98b6522dc9fafd5c32efe7a84dc9a31246a Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Thu, 8 Sep 2022 15:55:03 +0100
Subject: Avoid raising errors due to malformed IDs in
 `get_current_hosts_in_room` (#13748)

Handle malformed user IDs with no colons in `get_current_hosts_in_room`.
It's not currently possible for a malformed user ID to join a room, so
this error would never be hit.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/13748.misc                       | 1 +
 synapse/storage/databases/main/roommember.py | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13748.misc

(limited to 'synapse')

diff --git a/changelog.d/13748.misc b/changelog.d/13748.misc
new file mode 100644
index 0000000000..2f419bb659
--- /dev/null
+++ b/changelog.d/13748.misc
@@ -0,0 +1 @@
+Avoid raising an error due to malformed user IDs in `get_current_hosts_in_room`. Malformed user IDs cannot currently join a room, so this error would not be hit.
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index a77e49dc66..e6b87010a9 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -1044,6 +1044,8 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             # We use a `Set` just for fast lookups
             domain_set: Set[str] = set()
             for u in users:
+                if ":" not in u:
+                    continue
                 domain = get_domain_from_id(u)
                 if domain not in domain_set:
                     domain_set.add(domain)
@@ -1077,7 +1079,8 @@ class RoomMemberWorkerStore(EventsWorkerStore):
                 ORDER BY min(e.depth) ASC;
             """
             txn.execute(sql, (room_id,))
-            return [d for d, in txn]
+            # `server_domain` will be `NULL` for malformed MXIDs with no colons.
+            return [d for d, in txn if d is not None]
 
         return await self.db_pool.runInteraction(
             "get_current_hosts_in_room", get_current_hosts_in_room_txn
-- 
cgit 1.5.1


From 906cead9ca5fc95fd64680ec18f775f4de6ea97f Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Thu, 8 Sep 2022 15:55:29 +0100
Subject: Update docstrings to explain the impact of partial state (#13750)

Update the docstrings for `get_users_in_room` and
`get_current_hosts_in_room` to explain the impact of partial state.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/13750.misc                       |  1 +
 synapse/storage/databases/main/roommember.py | 17 ++++++++++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13750.misc

(limited to 'synapse')

diff --git a/changelog.d/13750.misc b/changelog.d/13750.misc
new file mode 100644
index 0000000000..3bccc21fc5
--- /dev/null
+++ b/changelog.d/13750.misc
@@ -0,0 +1 @@
+Update the docstrings for `get_users_in_room` and `get_current_hosts_in_room` to explain the impact of partial state.
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index e6b87010a9..6e1ff5626b 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -192,8 +192,15 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         (aka. with the lowest depth). This is done to match the sort in
         `get_current_hosts_in_room()` and so we can re-use the cache but it's
         not horrible to have here either.
-        """
 
+        Uses `m.room.member`s in the room state at the current forward extremities to
+        determine which users are in the room.
+
+        Will return inaccurate results for rooms with partial state, since the state for
+        the forward extremities of those rooms will exclude most members. We may also
+        calculate room state incorrectly for such rooms and believe that a member is or
+        is not in the room when the opposite is true.
+        """
         return await self.db_pool.runInteraction(
             "get_users_in_room", self.get_users_in_room_txn, room_id
         )
@@ -1022,6 +1029,14 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         longest is good because they're most likely to have anything we ask
         about.
 
+        Uses `m.room.member`s in the room state at the current forward extremities to
+        determine which hosts are in the room.
+
+        Will return inaccurate results for rooms with partial state, since the state for
+        the forward extremities of those rooms will exclude most members. We may also
+        calculate room state incorrectly for such rooms and believe that a host is or
+        is not in the room when the opposite is true.
+
         Returns:
             Returns a list of servers sorted by longest in the room first. (aka.
             sorted by join with the lowest depth first).
-- 
cgit 1.5.1


From f799eac7ea96f943ad1272a5a81f845dfa08a254 Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Thu, 8 Sep 2022 17:41:48 +0200
Subject: Add timestamp to user's consent (#13741)

Co-authored-by: reivilibre <olivier@librepush.net>
---
 changelog.d/13741.feature                          |  1 +
 docs/admin_api/user_admin_api.md                   |  2 ++
 synapse/handlers/admin.py                          |  1 +
 synapse/storage/databases/main/registration.py     |  6 +++-
 .../main/delta/72/06add_consent_ts_to_users.sql    | 16 +++++++++++
 tests/rest/admin/test_user.py                      |  1 +
 tests/storage/test_registration.py                 | 33 +++++++++++++++++-----
 7 files changed, 52 insertions(+), 8 deletions(-)
 create mode 100644 changelog.d/13741.feature
 create mode 100644 synapse/storage/schema/main/delta/72/06add_consent_ts_to_users.sql

(limited to 'synapse')

diff --git a/changelog.d/13741.feature b/changelog.d/13741.feature
new file mode 100644
index 0000000000..dff46f373f
--- /dev/null
+++ b/changelog.d/13741.feature
@@ -0,0 +1 @@
+Document the timestamp when a user accepts the consent, if [consent tracking](https://matrix-org.github.io/synapse/latest/consent_tracking.html) is used.
\ No newline at end of file
diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md
index c1ca0c8a64..975f05c929 100644
--- a/docs/admin_api/user_admin_api.md
+++ b/docs/admin_api/user_admin_api.md
@@ -42,6 +42,7 @@ It returns a JSON body like the following:
     "appservice_id": null,
     "consent_server_notice_sent": null,
     "consent_version": null,
+    "consent_ts": null,
     "external_ids": [
         {
             "auth_provider": "<provider1>",
@@ -364,6 +365,7 @@ The following actions are **NOT** performed. The list may be incomplete.
 - Remove the user's creation (registration) timestamp
 - [Remove rate limit overrides](#override-ratelimiting-for-users)
 - Remove from monthly active users
+- Remove user's consent information (consent version and timestamp)
 
 ## Reset password
 
diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py
index d4fe7df533..cf9f19608a 100644
--- a/synapse/handlers/admin.py
+++ b/synapse/handlers/admin.py
@@ -70,6 +70,7 @@ class AdminHandler:
             "appservice_id",
             "consent_server_notice_sent",
             "consent_version",
+            "consent_ts",
             "user_type",
             "is_guest",
         }
diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py
index 7fb9c801da..ac821878b0 100644
--- a/synapse/storage/databases/main/registration.py
+++ b/synapse/storage/databases/main/registration.py
@@ -175,6 +175,7 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore):
                 "is_guest",
                 "admin",
                 "consent_version",
+                "consent_ts",
                 "consent_server_notice_sent",
                 "appservice_id",
                 "creation_ts",
@@ -2227,7 +2228,10 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore):
                 txn,
                 table="users",
                 keyvalues={"name": user_id},
-                updatevalues={"consent_version": consent_version},
+                updatevalues={
+                    "consent_version": consent_version,
+                    "consent_ts": self._clock.time_msec(),
+                },
             )
             self._invalidate_cache_and_stream(txn, self.get_user_by_id, (user_id,))
 
diff --git a/synapse/storage/schema/main/delta/72/06add_consent_ts_to_users.sql b/synapse/storage/schema/main/delta/72/06add_consent_ts_to_users.sql
new file mode 100644
index 0000000000..609eb1750f
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/06add_consent_ts_to_users.sql
@@ -0,0 +1,16 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE users ADD consent_ts bigint;
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index 1afd082707..ec5ccf6fca 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -2580,6 +2580,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         self.assertIn("appservice_id", content)
         self.assertIn("consent_server_notice_sent", content)
         self.assertIn("consent_version", content)
+        self.assertIn("consent_ts", content)
         self.assertIn("external_ids", content)
 
         # This key was removed intentionally. Ensure it is not accidentally re-included.
diff --git a/tests/storage/test_registration.py b/tests/storage/test_registration.py
index a49ac1525e..853a93afab 100644
--- a/tests/storage/test_registration.py
+++ b/tests/storage/test_registration.py
@@ -11,15 +11,18 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.constants import UserTypes
 from synapse.api.errors import ThreepidValidationError
+from synapse.server import HomeServer
+from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase
 
 
 class RegistrationStoreTestCase(HomeserverTestCase):
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = hs.get_datastores().main
 
         self.user_id = "@my-user:test"
@@ -27,7 +30,7 @@ class RegistrationStoreTestCase(HomeserverTestCase):
         self.pwhash = "{xx1}123456789"
         self.device_id = "akgjhdjklgshg"
 
-    def test_register(self):
+    def test_register(self) -> None:
         self.get_success(self.store.register_user(self.user_id, self.pwhash))
 
         self.assertEqual(
@@ -38,6 +41,7 @@ class RegistrationStoreTestCase(HomeserverTestCase):
                 "admin": 0,
                 "is_guest": 0,
                 "consent_version": None,
+                "consent_ts": None,
                 "consent_server_notice_sent": None,
                 "appservice_id": None,
                 "creation_ts": 0,
@@ -48,7 +52,20 @@ class RegistrationStoreTestCase(HomeserverTestCase):
             (self.get_success(self.store.get_user_by_id(self.user_id))),
         )
 
-    def test_add_tokens(self):
+    def test_consent(self) -> None:
+        self.get_success(self.store.register_user(self.user_id, self.pwhash))
+        before_consent = self.clock.time_msec()
+        self.reactor.advance(5)
+        self.get_success(self.store.user_set_consent_version(self.user_id, "1"))
+        self.reactor.advance(5)
+
+        user = self.get_success(self.store.get_user_by_id(self.user_id))
+        assert user
+        self.assertEqual(user["consent_version"], "1")
+        self.assertGreater(user["consent_ts"], before_consent)
+        self.assertLess(user["consent_ts"], self.clock.time_msec())
+
+    def test_add_tokens(self) -> None:
         self.get_success(self.store.register_user(self.user_id, self.pwhash))
         self.get_success(
             self.store.add_access_token_to_user(
@@ -58,11 +75,12 @@ class RegistrationStoreTestCase(HomeserverTestCase):
 
         result = self.get_success(self.store.get_user_by_access_token(self.tokens[1]))
 
+        assert result
         self.assertEqual(result.user_id, self.user_id)
         self.assertEqual(result.device_id, self.device_id)
         self.assertIsNotNone(result.token_id)
 
-    def test_user_delete_access_tokens(self):
+    def test_user_delete_access_tokens(self) -> None:
         # add some tokens
         self.get_success(self.store.register_user(self.user_id, self.pwhash))
         self.get_success(
@@ -87,6 +105,7 @@ class RegistrationStoreTestCase(HomeserverTestCase):
 
         # check the one not associated with the device was not deleted
         user = self.get_success(self.store.get_user_by_access_token(self.tokens[0]))
+        assert user
         self.assertEqual(self.user_id, user.user_id)
 
         # now delete the rest
@@ -95,11 +114,11 @@ class RegistrationStoreTestCase(HomeserverTestCase):
         user = self.get_success(self.store.get_user_by_access_token(self.tokens[0]))
         self.assertIsNone(user, "access token was not deleted without device_id")
 
-    def test_is_support_user(self):
+    def test_is_support_user(self) -> None:
         TEST_USER = "@test:test"
         SUPPORT_USER = "@support:test"
 
-        res = self.get_success(self.store.is_support_user(None))
+        res = self.get_success(self.store.is_support_user(None))  # type: ignore[arg-type]
         self.assertFalse(res)
         self.get_success(
             self.store.register_user(user_id=TEST_USER, password_hash=None)
@@ -115,7 +134,7 @@ class RegistrationStoreTestCase(HomeserverTestCase):
         res = self.get_success(self.store.is_support_user(SUPPORT_USER))
         self.assertTrue(res)
 
-    def test_3pid_inhibit_invalid_validation_session_error(self):
+    def test_3pid_inhibit_invalid_validation_session_error(self) -> None:
         """Tests that enabling the configuration option to inhibit 3PID errors on
         /requestToken also inhibits validation errors caused by an unknown session ID.
         """
-- 
cgit 1.5.1


From 69fa29700e260f55238bfa480d3dd2b91a4353c0 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Thu, 8 Sep 2022 20:13:39 +0100
Subject: Re-type hint some collections in `/sync` code as read-only (#13754)

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/13754.misc   |  1 +
 synapse/handlers/sync.py | 20 ++++++++++----------
 2 files changed, 11 insertions(+), 10 deletions(-)
 create mode 100644 changelog.d/13754.misc

(limited to 'synapse')

diff --git a/changelog.d/13754.misc b/changelog.d/13754.misc
new file mode 100644
index 0000000000..662ee00e99
--- /dev/null
+++ b/changelog.d/13754.misc
@@ -0,0 +1 @@
+Re-type hint some collections as read-only.
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 2d95b1fa24..5293fa4d0e 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -15,6 +15,7 @@ import itertools
 import logging
 from typing import (
     TYPE_CHECKING,
+    AbstractSet,
     Any,
     Collection,
     Dict,
@@ -1413,10 +1414,10 @@ class SyncHandler:
     async def _generate_sync_entry_for_device_list(
         self,
         sync_result_builder: "SyncResultBuilder",
-        newly_joined_rooms: Set[str],
-        newly_joined_or_invited_or_knocked_users: Set[str],
-        newly_left_rooms: Set[str],
-        newly_left_users: Set[str],
+        newly_joined_rooms: AbstractSet[str],
+        newly_joined_or_invited_or_knocked_users: AbstractSet[str],
+        newly_left_rooms: AbstractSet[str],
+        newly_left_users: AbstractSet[str],
     ) -> DeviceListUpdates:
         """Generate the DeviceListUpdates section of sync
 
@@ -1434,8 +1435,7 @@ class SyncHandler:
         user_id = sync_result_builder.sync_config.user.to_string()
         since_token = sync_result_builder.since_token
 
-        # We're going to mutate these fields, so lets copy them rather than
-        # assume they won't get used later.
+        # Take a copy since these fields will be mutated later.
         newly_joined_or_invited_or_knocked_users = set(
             newly_joined_or_invited_or_knocked_users
         )
@@ -1635,8 +1635,8 @@ class SyncHandler:
     async def _generate_sync_entry_for_presence(
         self,
         sync_result_builder: "SyncResultBuilder",
-        newly_joined_rooms: Set[str],
-        newly_joined_or_invited_users: Set[str],
+        newly_joined_rooms: AbstractSet[str],
+        newly_joined_or_invited_users: AbstractSet[str],
     ) -> None:
         """Generates the presence portion of the sync response. Populates the
         `sync_result_builder` with the result.
@@ -1694,7 +1694,7 @@ class SyncHandler:
         self,
         sync_result_builder: "SyncResultBuilder",
         account_data_by_room: Dict[str, Dict[str, JsonDict]],
-    ) -> Tuple[Set[str], Set[str], Set[str], Set[str]]:
+    ) -> Tuple[AbstractSet[str], AbstractSet[str], AbstractSet[str], AbstractSet[str]]:
         """Generates the rooms portion of the sync response. Populates the
         `sync_result_builder` with the result.
 
@@ -2534,7 +2534,7 @@ class SyncResultBuilder:
     archived: List[ArchivedSyncResult] = attr.Factory(list)
     to_device: List[JsonDict] = attr.Factory(list)
 
-    def calculate_user_changes(self) -> Tuple[Set[str], Set[str]]:
+    def calculate_user_changes(self) -> Tuple[AbstractSet[str], AbstractSet[str]]:
         """Work out which other users have joined or left rooms we are joined to.
 
         This data only is only useful for an incremental sync.
-- 
cgit 1.5.1


From f2d2481e56f06005de5ae8429eca3bb31834079e Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 9 Sep 2022 11:14:10 +0100
Subject: Require SQLite >= 3.27.0 (#13760)

---
 changelog.d/13760.removal                      |   1 +
 synapse/storage/database.py                    |  47 +++++-----
 synapse/storage/databases/main/lock.py         | 121 ++++++++-----------------
 synapse/storage/databases/main/stats.py        |  86 +++++++-----------
 synapse/storage/databases/main/transactions.py |  30 ++----
 synapse/storage/engines/_base.py               |   8 --
 synapse/storage/engines/postgres.py            |   7 --
 synapse/storage/engines/sqlite.py              |  13 +--
 tests/storage/test_base.py                     |   1 -
 9 files changed, 106 insertions(+), 208 deletions(-)
 create mode 100644 changelog.d/13760.removal

(limited to 'synapse')

diff --git a/changelog.d/13760.removal b/changelog.d/13760.removal
new file mode 100644
index 0000000000..624e7c3678
--- /dev/null
+++ b/changelog.d/13760.removal
@@ -0,0 +1 @@
+Synapse will now refuse to start if configured to use SQLite < 3.27.
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index b394a6658b..e881bff7fb 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -533,15 +533,14 @@ class DatabasePool:
         if isinstance(self.engine, Sqlite3Engine):
             self._unsafe_to_upsert_tables.add("user_directory_search")
 
-        if self.engine.can_native_upsert:
-            # Check ASAP (and then later, every 1s) to see if we have finished
-            # background updates of tables that aren't safe to update.
-            self._clock.call_later(
-                0.0,
-                run_as_background_process,
-                "upsert_safety_check",
-                self._check_safe_to_upsert,
-            )
+        # Check ASAP (and then later, every 1s) to see if we have finished
+        # background updates of tables that aren't safe to update.
+        self._clock.call_later(
+            0.0,
+            run_as_background_process,
+            "upsert_safety_check",
+            self._check_safe_to_upsert,
+        )
 
     def name(self) -> str:
         "Return the name of this database"
@@ -1160,11 +1159,8 @@ class DatabasePool:
         attempts = 0
         while True:
             try:
-                # We can autocommit if we are going to use native upserts
-                autocommit = (
-                    self.engine.can_native_upsert
-                    and table not in self._unsafe_to_upsert_tables
-                )
+                # We can autocommit if it is safe to upsert
+                autocommit = table not in self._unsafe_to_upsert_tables
 
                 return await self.runInteraction(
                     desc,
@@ -1199,7 +1195,7 @@ class DatabasePool:
     ) -> bool:
         """
         Pick the UPSERT method which works best on the platform. Either the
-        native one (Pg9.5+, recent SQLites), or fall back to an emulated method.
+        native one (Pg9.5+, SQLite >= 3.24), or fall back to an emulated method.
 
         Args:
             txn: The transaction to use.
@@ -1207,14 +1203,15 @@ class DatabasePool:
             keyvalues: The unique key tables and their new values
             values: The nonunique columns and their new values
             insertion_values: additional key/values to use only when inserting
-            lock: True to lock the table when doing the upsert.
+            lock: True to lock the table when doing the upsert. Unused when performing
+                a native upsert.
         Returns:
             Returns True if a row was inserted or updated (i.e. if `values` is
             not empty then this always returns True)
         """
         insertion_values = insertion_values or {}
 
-        if self.engine.can_native_upsert and table not in self._unsafe_to_upsert_tables:
+        if table not in self._unsafe_to_upsert_tables:
             return self.simple_upsert_txn_native_upsert(
                 txn, table, keyvalues, values, insertion_values=insertion_values
             )
@@ -1365,14 +1362,12 @@ class DatabasePool:
             value_names: The value column names
             value_values: A list of each row's value column values.
                 Ignored if value_names is empty.
-            lock: True to lock the table when doing the upsert. Unused if the database engine
-                supports native upserts.
+            lock: True to lock the table when doing the upsert. Unused when performing
+                a native upsert.
         """
 
-        # We can autocommit if we are going to use native upserts
-        autocommit = (
-            self.engine.can_native_upsert and table not in self._unsafe_to_upsert_tables
-        )
+        # We can autocommit if it safe to upsert
+        autocommit = table not in self._unsafe_to_upsert_tables
 
         await self.runInteraction(
             desc,
@@ -1406,10 +1401,10 @@ class DatabasePool:
             value_names: The value column names
             value_values: A list of each row's value column values.
                 Ignored if value_names is empty.
-            lock: True to lock the table when doing the upsert. Unused if the database engine
-                supports native upserts.
+            lock: True to lock the table when doing the upsert. Unused when performing
+                a native upsert.
         """
-        if self.engine.can_native_upsert and table not in self._unsafe_to_upsert_tables:
+        if table not in self._unsafe_to_upsert_tables:
             return self.simple_upsert_many_txn_native_upsert(
                 txn, table, key_names, key_values, value_names, value_values
             )
diff --git a/synapse/storage/databases/main/lock.py b/synapse/storage/databases/main/lock.py
index 2d7633fbd5..7270ef09da 100644
--- a/synapse/storage/databases/main/lock.py
+++ b/synapse/storage/databases/main/lock.py
@@ -129,91 +129,48 @@ class LockStore(SQLBaseStore):
         now = self._clock.time_msec()
         token = random_string(6)
 
-        if self.db_pool.engine.can_native_upsert:
-
-            def _try_acquire_lock_txn(txn: LoggingTransaction) -> bool:
-                # We take out the lock if either a) there is no row for the lock
-                # already, b) the existing row has timed out, or c) the row is
-                # for this instance (which means the process got killed and
-                # restarted)
-                sql = """
-                    INSERT INTO worker_locks (lock_name, lock_key, instance_name, token, last_renewed_ts)
-                    VALUES (?, ?, ?, ?, ?)
-                    ON CONFLICT (lock_name, lock_key)
-                    DO UPDATE
-                        SET
-                            token = EXCLUDED.token,
-                            instance_name = EXCLUDED.instance_name,
-                            last_renewed_ts = EXCLUDED.last_renewed_ts
-                        WHERE
-                            worker_locks.last_renewed_ts < ?
-                            OR worker_locks.instance_name = EXCLUDED.instance_name
-                """
-                txn.execute(
-                    sql,
-                    (
-                        lock_name,
-                        lock_key,
-                        self._instance_name,
-                        token,
-                        now,
-                        now - _LOCK_TIMEOUT_MS,
-                    ),
-                )
-
-                # We only acquired the lock if we inserted or updated the table.
-                return bool(txn.rowcount)
-
-            did_lock = await self.db_pool.runInteraction(
-                "try_acquire_lock",
-                _try_acquire_lock_txn,
-                # We can autocommit here as we're executing a single query, this
-                # will avoid serialization errors.
-                db_autocommit=True,
+        def _try_acquire_lock_txn(txn: LoggingTransaction) -> bool:
+            # We take out the lock if either a) there is no row for the lock
+            # already, b) the existing row has timed out, or c) the row is
+            # for this instance (which means the process got killed and
+            # restarted)
+            sql = """
+               INSERT INTO worker_locks (lock_name, lock_key, instance_name, token, last_renewed_ts)
+               VALUES (?, ?, ?, ?, ?)
+               ON CONFLICT (lock_name, lock_key)
+               DO UPDATE
+                   SET
+                       token = EXCLUDED.token,
+                       instance_name = EXCLUDED.instance_name,
+                       last_renewed_ts = EXCLUDED.last_renewed_ts
+                   WHERE
+                       worker_locks.last_renewed_ts < ?
+                       OR worker_locks.instance_name = EXCLUDED.instance_name
+           """
+            txn.execute(
+                sql,
+                (
+                    lock_name,
+                    lock_key,
+                    self._instance_name,
+                    token,
+                    now,
+                    now - _LOCK_TIMEOUT_MS,
+                ),
             )
-            if not did_lock:
-                return None
-
-        else:
-            # If we're on an old SQLite we emulate the above logic by first
-            # clearing out any existing stale locks and then upserting.
-
-            def _try_acquire_lock_emulated_txn(txn: LoggingTransaction) -> bool:
-                sql = """
-                    DELETE FROM worker_locks
-                    WHERE
-                        lock_name = ?
-                        AND lock_key = ?
-                        AND (last_renewed_ts < ? OR instance_name = ?)
-                """
-                txn.execute(
-                    sql,
-                    (lock_name, lock_key, now - _LOCK_TIMEOUT_MS, self._instance_name),
-                )
-
-                inserted = self.db_pool.simple_upsert_txn_emulated(
-                    txn,
-                    table="worker_locks",
-                    keyvalues={
-                        "lock_name": lock_name,
-                        "lock_key": lock_key,
-                    },
-                    values={},
-                    insertion_values={
-                        "token": token,
-                        "last_renewed_ts": self._clock.time_msec(),
-                        "instance_name": self._instance_name,
-                    },
-                )
-
-                return inserted
 
-            did_lock = await self.db_pool.runInteraction(
-                "try_acquire_lock_emulated", _try_acquire_lock_emulated_txn
-            )
+            # We only acquired the lock if we inserted or updated the table.
+            return bool(txn.rowcount)
 
-            if not did_lock:
-                return None
+        did_lock = await self.db_pool.runInteraction(
+            "try_acquire_lock",
+            _try_acquire_lock_txn,
+            # We can autocommit here as we're executing a single query, this
+            # will avoid serialization errors.
+            db_autocommit=True,
+        )
+        if not did_lock:
+            return None
 
         lock = Lock(
             self._reactor,
diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py
index b4c652acf3..356d4ca788 100644
--- a/synapse/storage/databases/main/stats.py
+++ b/synapse/storage/databases/main/stats.py
@@ -446,59 +446,41 @@ class StatsStore(StateDeltasStore):
             absolutes: Absolute (set) fields
             additive_relatives: Fields that will be added onto if existing row present.
         """
-        if self.database_engine.can_native_upsert:
-            absolute_updates = [
-                "%(field)s = EXCLUDED.%(field)s" % {"field": field}
-                for field in absolutes.keys()
-            ]
-
-            relative_updates = [
-                "%(field)s = EXCLUDED.%(field)s + COALESCE(%(table)s.%(field)s, 0)"
-                % {"table": table, "field": field}
-                for field in additive_relatives.keys()
-            ]
-
-            insert_cols = []
-            qargs = []
-
-            for (key, val) in chain(
-                keyvalues.items(), absolutes.items(), additive_relatives.items()
-            ):
-                insert_cols.append(key)
-                qargs.append(val)
+        absolute_updates = [
+            "%(field)s = EXCLUDED.%(field)s" % {"field": field}
+            for field in absolutes.keys()
+        ]
+
+        relative_updates = [
+            "%(field)s = EXCLUDED.%(field)s + COALESCE(%(table)s.%(field)s, 0)"
+            % {"table": table, "field": field}
+            for field in additive_relatives.keys()
+        ]
+
+        insert_cols = []
+        qargs = []
+
+        for (key, val) in chain(
+            keyvalues.items(), absolutes.items(), additive_relatives.items()
+        ):
+            insert_cols.append(key)
+            qargs.append(val)
+
+        sql = """
+            INSERT INTO %(table)s (%(insert_cols_cs)s)
+            VALUES (%(insert_vals_qs)s)
+            ON CONFLICT (%(key_columns)s) DO UPDATE SET %(updates)s
+        """ % {
+            "table": table,
+            "insert_cols_cs": ", ".join(insert_cols),
+            "insert_vals_qs": ", ".join(
+                ["?"] * (len(keyvalues) + len(absolutes) + len(additive_relatives))
+            ),
+            "key_columns": ", ".join(keyvalues),
+            "updates": ", ".join(chain(absolute_updates, relative_updates)),
+        }
 
-            sql = """
-                INSERT INTO %(table)s (%(insert_cols_cs)s)
-                VALUES (%(insert_vals_qs)s)
-                ON CONFLICT (%(key_columns)s) DO UPDATE SET %(updates)s
-            """ % {
-                "table": table,
-                "insert_cols_cs": ", ".join(insert_cols),
-                "insert_vals_qs": ", ".join(
-                    ["?"] * (len(keyvalues) + len(absolutes) + len(additive_relatives))
-                ),
-                "key_columns": ", ".join(keyvalues),
-                "updates": ", ".join(chain(absolute_updates, relative_updates)),
-            }
-
-            txn.execute(sql, qargs)
-        else:
-            self.database_engine.lock_table(txn, table)
-            retcols = list(chain(absolutes.keys(), additive_relatives.keys()))
-            current_row = self.db_pool.simple_select_one_txn(
-                txn, table, keyvalues, retcols, allow_none=True
-            )
-            if current_row is None:
-                merged_dict = {**keyvalues, **absolutes, **additive_relatives}
-                self.db_pool.simple_insert_txn(txn, table, merged_dict)
-            else:
-                for (key, val) in additive_relatives.items():
-                    if current_row[key] is None:
-                        current_row[key] = val
-                    else:
-                        current_row[key] += val
-                current_row.update(absolutes)
-                self.db_pool.simple_update_one_txn(txn, table, keyvalues, current_row)
+        txn.execute(sql, qargs)
 
     async def _calculate_and_set_initial_state_for_room(self, room_id: str) -> None:
         """Calculate and insert an entry into room_stats_current.
diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py
index ba79e19f7f..f8c6877ee8 100644
--- a/synapse/storage/databases/main/transactions.py
+++ b/synapse/storage/databases/main/transactions.py
@@ -221,25 +221,15 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore):
             retry_interval: how long until next retry in ms
         """
 
-        if self.database_engine.can_native_upsert:
-            await self.db_pool.runInteraction(
-                "set_destination_retry_timings",
-                self._set_destination_retry_timings_native,
-                destination,
-                failure_ts,
-                retry_last_ts,
-                retry_interval,
-                db_autocommit=True,  # Safe as its a single upsert
-            )
-        else:
-            await self.db_pool.runInteraction(
-                "set_destination_retry_timings",
-                self._set_destination_retry_timings_emulated,
-                destination,
-                failure_ts,
-                retry_last_ts,
-                retry_interval,
-            )
+        await self.db_pool.runInteraction(
+            "set_destination_retry_timings",
+            self._set_destination_retry_timings_native,
+            destination,
+            failure_ts,
+            retry_last_ts,
+            retry_interval,
+            db_autocommit=True,  # Safe as it's a single upsert
+        )
 
     def _set_destination_retry_timings_native(
         self,
@@ -249,8 +239,6 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore):
         retry_last_ts: int,
         retry_interval: int,
     ) -> None:
-        assert self.database_engine.can_native_upsert
-
         # Upsert retry time interval if retry_interval is zero (i.e. we're
         # resetting it) or greater than the existing retry interval.
         #
diff --git a/synapse/storage/engines/_base.py b/synapse/storage/engines/_base.py
index 971ff82693..0d16a419a4 100644
--- a/synapse/storage/engines/_base.py
+++ b/synapse/storage/engines/_base.py
@@ -43,14 +43,6 @@ class BaseDatabaseEngine(Generic[ConnectionType], metaclass=abc.ABCMeta):
     def single_threaded(self) -> bool:
         ...
 
-    @property
-    @abc.abstractmethod
-    def can_native_upsert(self) -> bool:
-        """
-        Do we support native UPSERTs?
-        """
-        ...
-
     @property
     @abc.abstractmethod
     def supports_using_any_list(self) -> bool:
diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py
index 517f9d5f98..7f7d006ac2 100644
--- a/synapse/storage/engines/postgres.py
+++ b/synapse/storage/engines/postgres.py
@@ -158,13 +158,6 @@ class PostgresEngine(BaseDatabaseEngine[psycopg2.extensions.connection]):
         cursor.close()
         db_conn.commit()
 
-    @property
-    def can_native_upsert(self) -> bool:
-        """
-        Can we use native UPSERTs?
-        """
-        return True
-
     @property
     def supports_using_any_list(self) -> bool:
         """Do we support using `a = ANY(?)` and passing a list"""
diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py
index 621f2c5efe..095ae0a096 100644
--- a/synapse/storage/engines/sqlite.py
+++ b/synapse/storage/engines/sqlite.py
@@ -48,14 +48,6 @@ class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection]):
     def single_threaded(self) -> bool:
         return True
 
-    @property
-    def can_native_upsert(self) -> bool:
-        """
-        Do we support native UPSERTs? This requires SQLite3 3.24+, plus some
-        more work we haven't done yet to tell what was inserted vs updated.
-        """
-        return sqlite3.sqlite_version_info >= (3, 24, 0)
-
     @property
     def supports_using_any_list(self) -> bool:
         """Do we support using `a = ANY(?)` and passing a list"""
@@ -70,12 +62,11 @@ class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection]):
         self, db_conn: sqlite3.Connection, allow_outdated_version: bool = False
     ) -> None:
         if not allow_outdated_version:
-            version = sqlite3.sqlite_version_info
             # Synapse is untested against older SQLite versions, and we don't want
             # to let users upgrade to a version of Synapse with broken support for their
             # sqlite version, because it risks leaving them with a half-upgraded db.
-            if version < (3, 22, 0):
-                raise RuntimeError("Synapse requires sqlite 3.22 or above.")
+            if sqlite3.sqlite_version_info < (3, 27, 0):
+                raise RuntimeError("Synapse requires sqlite 3.27 or above.")
 
     def check_new_database(self, txn: Cursor) -> None:
         """Gets called when setting up a brand new database. This allows us to
diff --git a/tests/storage/test_base.py b/tests/storage/test_base.py
index cce8e75c74..40e58f8199 100644
--- a/tests/storage/test_base.py
+++ b/tests/storage/test_base.py
@@ -54,7 +54,6 @@ class SQLBaseStoreTestCase(unittest.TestCase):
         sqlite_config = {"name": "sqlite3"}
         engine = create_engine(sqlite_config)
         fake_engine = Mock(wraps=engine)
-        fake_engine.can_native_upsert = False
         fake_engine.in_transaction.return_value = False
 
         db = DatabasePool(Mock(), Mock(config=sqlite_config), fake_engine)
-- 
cgit 1.5.1


From 3d9f82efcb9c337197c9f50a88ec3fb541ee08ff Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 9 Sep 2022 07:08:41 -0400
Subject: Use an upsert for `receipts_graph`. (#13752)

Instead of a delete, then insert.

This was previously done for `receipts_linearized` in
2dc430d36ef793b38d6d79ec8db4ea60588df2ee (#7607).
---
 changelog.d/13752.misc                     |  1 +
 synapse/storage/databases/main/receipts.py | 12 ++++--------
 2 files changed, 5 insertions(+), 8 deletions(-)
 create mode 100644 changelog.d/13752.misc

(limited to 'synapse')

diff --git a/changelog.d/13752.misc b/changelog.d/13752.misc
new file mode 100644
index 0000000000..7624861b9f
--- /dev/null
+++ b/changelog.d/13752.misc
@@ -0,0 +1 @@
+User an additional database query when persisting receipts.
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index 124c70ad37..3838409519 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -812,7 +812,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
         # FIXME: This shouldn't invalidate the whole cache
         txn.call_after(self._get_linearized_receipts_for_room.invalidate, (room_id,))
 
-        self.db_pool.simple_delete_txn(
+        self.db_pool.simple_upsert_txn(
             txn,
             table="receipts_graph",
             keyvalues={
@@ -820,17 +820,13 @@ class ReceiptsWorkerStore(SQLBaseStore):
                 "receipt_type": receipt_type,
                 "user_id": user_id,
             },
-        )
-        self.db_pool.simple_insert_txn(
-            txn,
-            table="receipts_graph",
             values={
-                "room_id": room_id,
-                "receipt_type": receipt_type,
-                "user_id": user_id,
                 "event_ids": json_encoder.encode(event_ids),
                 "data": json_encoder.encode(data),
             },
+            # receipts_graph has a unique constraint on
+            # (user_id, room_id, receipt_type), so no need to lock
+            lock=False,
         )
 
 
-- 
cgit 1.5.1


From f694bb71b7ea7841a5b5db3d884dfda5a3f78023 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Fri, 9 Sep 2022 11:30:06 -0500
Subject: Strip number suffix from instance name to consolidate services that
 traces are spread over (#13729)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The problem with many services is that it makes it hard to find which service has the trace you want, see https://github.com/jaegertracing/jaeger-ui/issues/985

Previously, we split traces out into services based on their instance name like `matrix.org client_reader-1`, etc but there are many worker instances of the same `client_reader` so there is a lot to click through.

With this PR, all of the traces are just collected under the worker type like `client_reader`, `event_persister` 😇

Note: A Synapse worker instance name is an opaque string with the number convention only being our own thing for the `matrix.org` deployment. But seems pretty sensible to group things this way.
---
 changelog.d/13729.misc         |  1 +
 synapse/logging/opentracing.py | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13729.misc

(limited to 'synapse')

diff --git a/changelog.d/13729.misc b/changelog.d/13729.misc
new file mode 100644
index 0000000000..c6a6f617e3
--- /dev/null
+++ b/changelog.d/13729.misc
@@ -0,0 +1 @@
+Strip number suffix from instance name to consolidate services that traces are spread over.
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index 482316a1ff..adf3f54770 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -203,6 +203,9 @@ if TYPE_CHECKING:
 
 # Helper class
 
+# Matches the number suffix in an instance name like "matrix.org client_reader-8"
+STRIP_INSTANCE_NUMBER_SUFFIX_REGEX = re.compile(r"[_-]?\d+$")
+
 
 class _DummyTagNames:
     """wrapper of opentracings tags. We need to have them if we
@@ -441,9 +444,17 @@ def init_tracer(hs: "HomeServer") -> None:
 
     from jaeger_client.metrics.prometheus import PrometheusMetricsFactory
 
+    # Instance names are opaque strings but by stripping off the number suffix,
+    # we can get something that looks like a "worker type", e.g.
+    # "client_reader-1" -> "client_reader" so we don't spread the traces across
+    # so many services.
+    instance_name_by_type = re.sub(
+        STRIP_INSTANCE_NUMBER_SUFFIX_REGEX, "", hs.get_instance_name()
+    )
+
     config = JaegerConfig(
         config=hs.config.tracing.jaeger_config,
-        service_name=f"{hs.config.server.server_name} {hs.get_instance_name()}",
+        service_name=f"{hs.config.server.server_name} {instance_name_by_type}",
         scope_manager=LogContextScopeManager(),
         metrics_factory=PrometheusMetricsFactory(),
     )
-- 
cgit 1.5.1


From a911ffb42cc88adc8084a04acf6fd651efba278f Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Fri, 9 Sep 2022 11:31:37 -0500
Subject: Tag trace with instance name (#13761)

We tag the Synapse instance name so that it's an easy jumping off point into the logs. Can also be used to filter for an instance that is under load.

As suggested by @clokep and @reivilibre in,

 - https://github.com/matrix-org/synapse/pull/13729#discussion_r964719258
 - https://github.com/matrix-org/synapse/pull/13729#discussion_r964733578
---
 changelog.d/13761.misc         | 1 +
 synapse/api/auth.py            | 7 +++++++
 synapse/logging/opentracing.py | 6 ++++--
 3 files changed, 12 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13761.misc

(limited to 'synapse')

diff --git a/changelog.d/13761.misc b/changelog.d/13761.misc
new file mode 100644
index 0000000000..f7aa8c459a
--- /dev/null
+++ b/changelog.d/13761.misc
@@ -0,0 +1 @@
+Tag traces with the instance name to be able to easily jump into the right logs and filter traces by instance.
diff --git a/synapse/api/auth.py b/synapse/api/auth.py
index 8e54ef84b2..4a75eb6b21 100644
--- a/synapse/api/auth.py
+++ b/synapse/api/auth.py
@@ -32,6 +32,7 @@ from synapse.appservice import ApplicationService
 from synapse.http import get_request_user_agent
 from synapse.http.site import SynapseRequest
 from synapse.logging.opentracing import (
+    SynapseTags,
     active_span,
     force_tracing,
     start_active_span,
@@ -161,6 +162,12 @@ class Auth:
                 parent_span.set_tag(
                     "authenticated_entity", requester.authenticated_entity
                 )
+                # We tag the Synapse instance name so that it's an easy jumping
+                # off point into the logs. Can also be used to filter for an
+                # instance that is under load.
+                parent_span.set_tag(
+                    SynapseTags.INSTANCE_NAME, self.hs.get_instance_name()
+                )
                 parent_span.set_tag("user_id", requester.user.to_string())
                 if requester.device_id is not None:
                     parent_span.set_tag("device_id", requester.device_id)
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index adf3f54770..ca2735dd6d 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -298,6 +298,8 @@ class SynapseTags:
     # Whether the sync response has new data to be returned to the client.
     SYNC_RESULT = "sync.new_data"
 
+    INSTANCE_NAME = "instance_name"
+
     # incoming HTTP request ID  (as written in the logs)
     REQUEST_ID = "request_id"
 
@@ -1043,11 +1045,11 @@ def trace_servlet(
             # with JsonResource).
             scope.span.set_operation_name(request.request_metrics.name)
 
-            # set the tags *after* the servlet completes, in case it decided to
-            # prioritise the span (tags will get dropped on unprioritised spans)
             request_tags[
                 SynapseTags.REQUEST_TAG
             ] = request.request_metrics.start_context.tag
 
+            # set the tags *after* the servlet completes, in case it decided to
+            # prioritise the span (tags will get dropped on unprioritised spans)
             for k, v in request_tags.items():
                 scope.span.set_tag(k, v)
-- 
cgit 1.5.1


From 4c4889cac0e6f7df4689287b9fddea1bf8b15b7f Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Fri, 9 Sep 2022 19:00:21 +0100
Subject: Concurrently collect room unread counts for push badges (#13765)

Most of the time this function is heavily cached, but when that isn't
the case fetching the counts room by room slows down push delivery on
users with many (thousands) of rooms.

Signed off by Nick @ Beeper.
---
 changelog.d/13765.misc     |  1 +
 synapse/push/push_tools.py | 13 ++++++++++---
 2 files changed, 11 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/13765.misc

(limited to 'synapse')

diff --git a/changelog.d/13765.misc b/changelog.d/13765.misc
new file mode 100644
index 0000000000..fdda5cf3b6
--- /dev/null
+++ b/changelog.d/13765.misc
@@ -0,0 +1 @@
+Concurrently fetch room push actions when calculating badge counts. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/push/push_tools.py b/synapse/push/push_tools.py
index 6661887d9f..658bf373b7 100644
--- a/synapse/push/push_tools.py
+++ b/synapse/push/push_tools.py
@@ -17,6 +17,7 @@ from synapse.events import EventBase
 from synapse.push.presentable_names import calculate_room_name, name_from_member_event
 from synapse.storage.controllers import StorageControllers
 from synapse.storage.databases.main import DataStore
+from synapse.util.async_helpers import concurrently_execute
 
 
 async def get_badge_count(store: DataStore, user_id: str, group_by_room: bool) -> int:
@@ -25,13 +26,19 @@ async def get_badge_count(store: DataStore, user_id: str, group_by_room: bool) -
 
     badge = len(invites)
 
-    for room_id in joins:
-        notifs = await (
-            store.get_unread_event_push_actions_by_room_for_user(
+    room_notifs = []
+
+    async def get_room_unread_count(room_id: str) -> None:
+        room_notifs.append(
+            await store.get_unread_event_push_actions_by_room_for_user(
                 room_id,
                 user_id,
             )
         )
+
+    await concurrently_execute(get_room_unread_count, joins, 10)
+
+    for notifs in room_notifs:
         if notifs.notify_count == 0:
             continue
 
-- 
cgit 1.5.1


From ebfeac7c5ded851a2639911ec6adf9d0fcdb029a Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 12 Sep 2022 11:03:42 +0100
Subject: Check if Rust lib needs rebuilding. (#13759)

This protects against the common mistake of failing to remember to rebuild Rust code after making changes.
---
 changelog.d/13759.misc         |  1 +
 rust/Cargo.toml                |  4 ++
 rust/build.rs                  | 45 ++++++++++++++++++++++
 rust/src/lib.rs                | 10 ++++-
 stubs/synapse/synapse_rust.pyi |  1 +
 synapse/__init__.py            |  5 +++
 synapse/util/rust.py           | 84 ++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 149 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13759.misc
 create mode 100644 rust/build.rs
 create mode 100644 synapse/util/rust.py

(limited to 'synapse')

diff --git a/changelog.d/13759.misc b/changelog.d/13759.misc
new file mode 100644
index 0000000000..f91c512483
--- /dev/null
+++ b/changelog.d/13759.misc
@@ -0,0 +1 @@
+Add a check for editable installs if the Rust library needs rebuilding.
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index 0a9760cafc..deddf3cec2 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -19,3 +19,7 @@ name = "synapse.synapse_rust"
 
 [dependencies]
 pyo3 = { version = "0.16.5", features = ["extension-module", "macros", "abi3", "abi3-py37"] }
+
+[build-dependencies]
+blake2 = "0.10.4"
+hex = "0.4.3"
diff --git a/rust/build.rs b/rust/build.rs
new file mode 100644
index 0000000000..2117975e56
--- /dev/null
+++ b/rust/build.rs
@@ -0,0 +1,45 @@
+//! This build script calculates the hash of all files in the `src/`
+//! directory and adds it as an environment variable during build time.
+//!
+//! This is used so that the python code can detect when the built native module
+//! does not match the source in-tree, helping to detect the case where the
+//! source has been updated but the library hasn't been rebuilt.
+
+use std::path::PathBuf;
+
+use blake2::{Blake2b512, Digest};
+
+fn main() -> Result<(), std::io::Error> {
+    let mut dirs = vec![PathBuf::from("src")];
+
+    let mut paths = Vec::new();
+    while let Some(path) = dirs.pop() {
+        let mut entries = std::fs::read_dir(path)?
+            .map(|res| res.map(|e| e.path()))
+            .collect::<Result<Vec<_>, std::io::Error>>()?;
+
+        entries.sort();
+
+        for entry in entries {
+            if entry.is_dir() {
+                dirs.push(entry)
+            } else {
+                paths.push(entry.to_str().expect("valid rust paths").to_string());
+            }
+        }
+    }
+
+    paths.sort();
+
+    let mut hasher = Blake2b512::new();
+
+    for path in paths {
+        let bytes = std::fs::read(path)?;
+        hasher.update(bytes);
+    }
+
+    let hex_digest = hex::encode(hasher.finalize());
+    println!("cargo:rustc-env=SYNAPSE_RUST_DIGEST={hex_digest}");
+
+    Ok(())
+}
diff --git a/rust/src/lib.rs b/rust/src/lib.rs
index 142fc2ed93..ba42465fb8 100644
--- a/rust/src/lib.rs
+++ b/rust/src/lib.rs
@@ -1,5 +1,13 @@
 use pyo3::prelude::*;
 
+/// Returns the hash of all the rust source files at the time it was compiled.
+///
+/// Used by python to detect if the rust library is outdated.
+#[pyfunction]
+fn get_rust_file_digest() -> &'static str {
+    env!("SYNAPSE_RUST_DIGEST")
+}
+
 /// Formats the sum of two numbers as string.
 #[pyfunction]
 #[pyo3(text_signature = "(a, b, /)")]
@@ -11,6 +19,6 @@ fn sum_as_string(a: usize, b: usize) -> PyResult<String> {
 #[pymodule]
 fn synapse_rust(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
     m.add_function(wrap_pyfunction!(sum_as_string, m)?)?;
-
+    m.add_function(wrap_pyfunction!(get_rust_file_digest, m)?)?;
     Ok(())
 }
diff --git a/stubs/synapse/synapse_rust.pyi b/stubs/synapse/synapse_rust.pyi
index 5b51ba05d7..8658d3138f 100644
--- a/stubs/synapse/synapse_rust.pyi
+++ b/stubs/synapse/synapse_rust.pyi
@@ -1 +1,2 @@
 def sum_as_string(a: int, b: int) -> str: ...
+def get_rust_file_digest() -> str: ...
diff --git a/synapse/__init__.py b/synapse/__init__.py
index b1369aca8f..1bed6393bd 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -20,6 +20,8 @@ import json
 import os
 import sys
 
+from synapse.util.rust import check_rust_lib_up_to_date
+
 # Check that we're not running on an unsupported Python version.
 if sys.version_info < (3, 7):
     print("Synapse requires Python 3.7 or above.")
@@ -78,3 +80,6 @@ if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)):
     from synapse.util.patch_inline_callbacks import do_patch
 
     do_patch()
+
+
+check_rust_lib_up_to_date()
diff --git a/synapse/util/rust.py b/synapse/util/rust.py
new file mode 100644
index 0000000000..30ecb9ffd9
--- /dev/null
+++ b/synapse/util/rust.py
@@ -0,0 +1,84 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+from hashlib import blake2b
+
+import synapse
+from synapse.synapse_rust import get_rust_file_digest
+
+
+def check_rust_lib_up_to_date() -> None:
+    """For editable installs check if the rust library is outdated and needs to
+    be rebuilt.
+    """
+
+    if not _dist_is_editable():
+        return
+
+    synapse_dir = os.path.dirname(synapse.__file__)
+    synapse_root = os.path.abspath(os.path.join(synapse_dir, ".."))
+
+    # Double check we've not gone into site-packages...
+    if os.path.basename(synapse_root) == "site-packages":
+        return
+
+    # ... and it looks like the root of a python project.
+    if not os.path.exists("pyproject.toml"):
+        return
+
+    # Get the hash of all Rust source files
+    hash = _hash_rust_files_in_directory(os.path.join(synapse_root, "rust", "src"))
+
+    if hash != get_rust_file_digest():
+        raise Exception("Rust module outdated. Please rebuild using `poetry install`")
+
+
+def _hash_rust_files_in_directory(directory: str) -> str:
+    """Get the hash of all files in a directory (recursively)"""
+
+    directory = os.path.abspath(directory)
+
+    paths = []
+
+    dirs = [directory]
+    while dirs:
+        dir = dirs.pop()
+        with os.scandir(dir) as d:
+            for entry in d:
+                if entry.is_dir():
+                    dirs.append(entry.path)
+                else:
+                    paths.append(entry.path)
+
+    # We sort to make sure that we get a consistent and well-defined ordering.
+    paths.sort()
+
+    hasher = blake2b()
+
+    for path in paths:
+        with open(os.path.join(directory, path), "rb") as f:
+            hasher.update(f.read())
+
+    return hasher.hexdigest()
+
+
+def _dist_is_editable() -> bool:
+    """Is distribution an editable install?"""
+    for path_item in sys.path:
+        egg_link = os.path.join(path_item, "matrix-synapse.egg-link")
+        if os.path.isfile(egg_link):
+            return True
+    return False
-- 
cgit 1.5.1


From da41a7cd618d11b05c2c04c39068fd4b1e1b7894 Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Mon, 12 Sep 2022 12:58:33 +0100
Subject: Remove check current state membership up to date (#13745)

* Remove checks for membership column in current_state_events
* Add schema script to force through the
  `current_state_events_membership` background job

Contributed by Nick @ Beeper (@fizzadar).
---
 changelog.d/13745.misc                             |   1 +
 synapse/storage/databases/main/roommember.py       | 202 +++++----------------
 ...force_update_current_state_events_membership.py |  52 ++++++
 3 files changed, 100 insertions(+), 155 deletions(-)
 create mode 100644 changelog.d/13745.misc
 create mode 100644 synapse/storage/schema/main/delta/72/07force_update_current_state_events_membership.py

(limited to 'synapse')

diff --git a/changelog.d/13745.misc b/changelog.d/13745.misc
new file mode 100644
index 0000000000..e97a789c0e
--- /dev/null
+++ b/changelog.d/13745.misc
@@ -0,0 +1 @@
+Remove old queries to join room memberships to current state events. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 6e1ff5626b..fdb4684e12 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -32,10 +32,7 @@ import attr
 
 from synapse.api.constants import EventTypes, Membership
 from synapse.metrics import LaterGauge
-from synapse.metrics.background_process_metrics import (
-    run_as_background_process,
-    wrap_as_background_process,
-)
+from synapse.metrics.background_process_metrics import wrap_as_background_process
 from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
 from synapse.storage.database import (
     DatabasePool,
@@ -91,16 +88,6 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         # at a time. Keyed by room_id.
         self._joined_host_linearizer = Linearizer("_JoinedHostsCache")
 
-        # Is the current_state_events.membership up to date? Or is the
-        # background update still running?
-        self._current_state_events_membership_up_to_date = False
-
-        txn = db_conn.cursor(
-            txn_name="_check_safe_current_state_events_membership_updated"
-        )
-        self._check_safe_current_state_events_membership_updated_txn(txn)
-        txn.close()
-
         if (
             self.hs.config.worker.run_background_tasks
             and self.hs.config.metrics.metrics_flags.known_servers
@@ -157,34 +144,6 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         self._known_servers_count = max([count, 1])
         return self._known_servers_count
 
-    def _check_safe_current_state_events_membership_updated_txn(
-        self, txn: LoggingTransaction
-    ) -> None:
-        """Checks if it is safe to assume the new current_state_events
-        membership column is up to date
-        """
-
-        pending_update = self.db_pool.simple_select_one_txn(
-            txn,
-            table="background_updates",
-            keyvalues={"update_name": _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME},
-            retcols=["update_name"],
-            allow_none=True,
-        )
-
-        self._current_state_events_membership_up_to_date = not pending_update
-
-        # If the update is still running, reschedule to run.
-        if pending_update:
-            self._clock.call_later(
-                15.0,
-                run_as_background_process,
-                "_check_safe_current_state_events_membership_updated",
-                self.db_pool.runInteraction,
-                "_check_safe_current_state_events_membership_updated",
-                self._check_safe_current_state_events_membership_updated_txn,
-            )
-
     @cached(max_entries=100000, iterable=True)
     async def get_users_in_room(self, room_id: str) -> List[str]:
         """
@@ -212,31 +171,14 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         `get_current_hosts_in_room()` and so we can re-use the cache but it's
         not horrible to have here either.
         """
-        # If we can assume current_state_events.membership is up to date
-        # then we can avoid a join, which is a Very Good Thing given how
-        # frequently this function gets called.
-        if self._current_state_events_membership_up_to_date:
-            sql = """
-                SELECT c.state_key FROM current_state_events as c
-                /* Get the depth of the event from the events table */
-                INNER JOIN events AS e USING (event_id)
-                WHERE c.type = 'm.room.member' AND c.room_id = ? AND membership = ?
-                /* Sorted by lowest depth first */
-                ORDER BY e.depth ASC;
-            """
-        else:
-            sql = """
-                SELECT c.state_key FROM room_memberships as m
-                /* Get the depth of the event from the events table */
-                INNER JOIN events AS e USING (event_id)
-                INNER JOIN current_state_events as c
-                ON m.event_id = c.event_id
-                AND m.room_id = c.room_id
-                AND m.user_id = c.state_key
-                WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ?
-                /* Sorted by lowest depth first */
-                ORDER BY e.depth ASC;
-            """
+        sql = """
+            SELECT c.state_key FROM current_state_events as c
+            /* Get the depth of the event from the events table */
+            INNER JOIN events AS e USING (event_id)
+            WHERE c.type = 'm.room.member' AND c.room_id = ? AND membership = ?
+            /* Sorted by lowest depth first */
+            ORDER BY e.depth ASC;
+        """
 
         txn.execute(sql, (room_id, Membership.JOIN))
         return [r[0] for r in txn]
@@ -353,28 +295,14 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             # We do this all in one transaction to keep the cache small.
             # FIXME: get rid of this when we have room_stats
 
-            # If we can assume current_state_events.membership is up to date
-            # then we can avoid a join, which is a Very Good Thing given how
-            # frequently this function gets called.
-            if self._current_state_events_membership_up_to_date:
-                # Note, rejected events will have a null membership field, so
-                # we we manually filter them out.
-                sql = """
-                    SELECT count(*), membership FROM current_state_events
-                    WHERE type = 'm.room.member' AND room_id = ?
-                        AND membership IS NOT NULL
-                    GROUP BY membership
-                """
-            else:
-                sql = """
-                    SELECT count(*), m.membership FROM room_memberships as m
-                    INNER JOIN current_state_events as c
-                    ON m.event_id = c.event_id
-                    AND m.room_id = c.room_id
-                    AND m.user_id = c.state_key
-                    WHERE c.type = 'm.room.member' AND c.room_id = ?
-                    GROUP BY m.membership
-                """
+            # Note, rejected events will have a null membership field, so
+            # we we manually filter them out.
+            sql = """
+                SELECT count(*), membership FROM current_state_events
+                WHERE type = 'm.room.member' AND room_id = ?
+                    AND membership IS NOT NULL
+                GROUP BY membership
+            """
 
             txn.execute(sql, (room_id,))
             res: Dict[str, MemberSummary] = {}
@@ -383,30 +311,18 @@ class RoomMemberWorkerStore(EventsWorkerStore):
 
             # we order by membership and then fairly arbitrarily by event_id so
             # heroes are consistent
-            if self._current_state_events_membership_up_to_date:
-                # Note, rejected events will have a null membership field, so
-                # we we manually filter them out.
-                sql = """
-                    SELECT state_key, membership, event_id
-                    FROM current_state_events
-                    WHERE type = 'm.room.member' AND room_id = ?
-                        AND membership IS NOT NULL
-                    ORDER BY
-                        CASE membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC,
-                        event_id ASC
-                    LIMIT ?
-                """
-            else:
-                sql = """
-                    SELECT c.state_key, m.membership, c.event_id
-                    FROM room_memberships as m
-                    INNER JOIN current_state_events as c USING (room_id, event_id)
-                    WHERE c.type = 'm.room.member' AND c.room_id = ?
-                    ORDER BY
-                        CASE m.membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC,
-                        c.event_id ASC
-                    LIMIT ?
-                """
+            # Note, rejected events will have a null membership field, so
+            # we we manually filter them out.
+            sql = """
+                SELECT state_key, membership, event_id
+                FROM current_state_events
+                WHERE type = 'm.room.member' AND room_id = ?
+                    AND membership IS NOT NULL
+                ORDER BY
+                    CASE membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC,
+                    event_id ASC
+                LIMIT ?
+            """
 
             # 6 is 5 (number of heroes) plus 1, in case one of them is the calling user.
             txn.execute(sql, (room_id, Membership.JOIN, Membership.INVITE, 6))
@@ -649,27 +565,15 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         # We use `current_state_events` here and not `local_current_membership`
         # as a) this gets called with remote users and b) this only gets called
         # for rooms the server is participating in.
-        if self._current_state_events_membership_up_to_date:
-            sql = """
-                SELECT room_id, e.instance_name, e.stream_ordering
-                FROM current_state_events AS c
-                INNER JOIN events AS e USING (room_id, event_id)
-                WHERE
-                    c.type = 'm.room.member'
-                    AND c.state_key = ?
-                    AND c.membership = ?
-            """
-        else:
-            sql = """
-                SELECT room_id, e.instance_name, e.stream_ordering
-                FROM current_state_events AS c
-                INNER JOIN room_memberships AS m USING (room_id, event_id)
-                INNER JOIN events AS e USING (room_id, event_id)
-                WHERE
-                    c.type = 'm.room.member'
-                    AND c.state_key = ?
-                    AND m.membership = ?
-            """
+        sql = """
+            SELECT room_id, e.instance_name, e.stream_ordering
+            FROM current_state_events AS c
+            INNER JOIN events AS e USING (room_id, event_id)
+            WHERE
+                c.type = 'm.room.member'
+                AND c.state_key = ?
+                AND c.membership = ?
+        """
 
         txn.execute(sql, (user_id, Membership.JOIN))
         return frozenset(
@@ -707,27 +611,15 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             user_ids,
         )
 
-        if self._current_state_events_membership_up_to_date:
-            sql = f"""
-                SELECT c.state_key, room_id, e.instance_name, e.stream_ordering
-                FROM current_state_events AS c
-                INNER JOIN events AS e USING (room_id, event_id)
-                WHERE
-                    c.type = 'm.room.member'
-                    AND c.membership = ?
-                    AND {clause}
-            """
-        else:
-            sql = f"""
-                SELECT c.state_key, room_id, e.instance_name, e.stream_ordering
-                FROM current_state_events AS c
-                INNER JOIN room_memberships AS m USING (room_id, event_id)
-                INNER JOIN events AS e USING (room_id, event_id)
-                WHERE
-                    c.type = 'm.room.member'
-                    AND m.membership = ?
-                    AND {clause}
-            """
+        sql = f"""
+            SELECT c.state_key, room_id, e.instance_name, e.stream_ordering
+            FROM current_state_events AS c
+            INNER JOIN events AS e USING (room_id, event_id)
+            WHERE
+                c.type = 'm.room.member'
+                AND c.membership = ?
+                AND {clause}
+        """
 
         txn.execute(sql, [Membership.JOIN] + args)
 
diff --git a/synapse/storage/schema/main/delta/72/07force_update_current_state_events_membership.py b/synapse/storage/schema/main/delta/72/07force_update_current_state_events_membership.py
new file mode 100644
index 0000000000..b5853d125c
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/07force_update_current_state_events_membership.py
@@ -0,0 +1,52 @@
+# Copyright 2022 Beeper
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""
+Forces through the `current_state_events_membership` background job so checks
+for its completion can be removed.
+
+Note the background job must still remain defined in the database class.
+"""
+
+
+def run_upgrade(cur, database_engine, *args, **kwargs):
+    cur.execute("SELECT update_name FROM background_updates")
+    rows = cur.fetchall()
+    for row in rows:
+        if row[0] == "current_state_events_membership":
+            break
+    # No pending background job so nothing to do here
+    else:
+        return
+
+    # Populate membership field for all current_state_events, this may take
+    # a while but was originally handled via a background update in 2019.
+    cur.execute(
+        """
+        UPDATE current_state_events
+        SET membership = (
+            SELECT membership FROM room_memberships
+            WHERE event_id = current_state_events.event_id
+        )
+        """
+    )
+
+    # Finally, delete the background job because we've handled it above
+    cur.execute(
+        """
+        DELETE FROM background_updates
+        WHERE update_name = 'current_state_events_membership'
+        """
+    )
-- 
cgit 1.5.1


From cdbb6412327b542e0dead792717fe58253291131 Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Tue, 13 Sep 2022 08:16:37 +0100
Subject: Add receipts event stream ordering (#13703)

---
 changelog.d/13703.misc                             |  1 +
 synapse/_scripts/synapse_port_db.py                |  2 +
 synapse/storage/databases/main/receipts.py         | 74 +++++++++++++++++++++-
 .../delta/72/05receipts_event_stream_ordering.sql  | 19 ++++++
 4 files changed, 95 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13703.misc
 create mode 100644 synapse/storage/schema/main/delta/72/05receipts_event_stream_ordering.sql

(limited to 'synapse')

diff --git a/changelog.d/13703.misc b/changelog.d/13703.misc
new file mode 100644
index 0000000000..685a29b17d
--- /dev/null
+++ b/changelog.d/13703.misc
@@ -0,0 +1 @@
+Add & populate `event_stream_ordering` column on receipts table for future optimisation of push action processing. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index 543bba27c2..30983c47fb 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -67,6 +67,7 @@ from synapse.storage.databases.main.media_repository import (
 )
 from synapse.storage.databases.main.presence import PresenceBackgroundUpdateStore
 from synapse.storage.databases.main.pusher import PusherWorkerStore
+from synapse.storage.databases.main.receipts import ReceiptsBackgroundUpdateStore
 from synapse.storage.databases.main.registration import (
     RegistrationBackgroundUpdateStore,
     find_max_generated_user_id_localpart,
@@ -203,6 +204,7 @@ class Store(
     PushRuleStore,
     PusherWorkerStore,
     PresenceBackgroundUpdateStore,
+    ReceiptsBackgroundUpdateStore,
 ):
     def execute(self, f: Callable[..., R], *args: Any, **kwargs: Any) -> Awaitable[R]:
         return self.db_pool.runInteraction(f.__name__, f, *args, **kwargs)
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index 3838409519..719a12b0ae 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -675,6 +675,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
             values={
                 "stream_id": stream_id,
                 "event_id": event_id,
+                "event_stream_ordering": stream_ordering,
                 "data": json_encoder.encode(data),
             },
             # receipts_linearized has a unique constraint on
@@ -830,5 +831,76 @@ class ReceiptsWorkerStore(SQLBaseStore):
         )
 
 
-class ReceiptsStore(ReceiptsWorkerStore):
+class ReceiptsBackgroundUpdateStore(SQLBaseStore):
+    POPULATE_RECEIPT_EVENT_STREAM_ORDERING = "populate_event_stream_ordering"
+
+    def __init__(
+        self,
+        database: DatabasePool,
+        db_conn: LoggingDatabaseConnection,
+        hs: "HomeServer",
+    ):
+        super().__init__(database, db_conn, hs)
+
+        self.db_pool.updates.register_background_update_handler(
+            self.POPULATE_RECEIPT_EVENT_STREAM_ORDERING,
+            self._populate_receipt_event_stream_ordering,
+        )
+
+    async def _populate_receipt_event_stream_ordering(
+        self, progress: JsonDict, batch_size: int
+    ) -> int:
+        def _populate_receipt_event_stream_ordering_txn(
+            txn: LoggingTransaction,
+        ) -> bool:
+
+            if "max_stream_id" in progress:
+                max_stream_id = progress["max_stream_id"]
+            else:
+                txn.execute("SELECT max(stream_id) FROM receipts_linearized")
+                res = txn.fetchone()
+                if res is None or res[0] is None:
+                    return True
+                else:
+                    max_stream_id = res[0]
+
+            start = progress.get("stream_id", 0)
+            stop = start + batch_size
+
+            sql = """
+                UPDATE receipts_linearized
+                SET event_stream_ordering = (
+                    SELECT stream_ordering
+                    FROM events
+                    WHERE event_id = receipts_linearized.event_id
+                )
+                WHERE stream_id >= ? AND stream_id < ?
+            """
+            txn.execute(sql, (start, stop))
+
+            self.db_pool.updates._background_update_progress_txn(
+                txn,
+                self.POPULATE_RECEIPT_EVENT_STREAM_ORDERING,
+                {
+                    "stream_id": stop,
+                    "max_stream_id": max_stream_id,
+                },
+            )
+
+            return stop > max_stream_id
+
+        finished = await self.db_pool.runInteraction(
+            "_remove_devices_from_device_inbox_txn",
+            _populate_receipt_event_stream_ordering_txn,
+        )
+
+        if finished:
+            await self.db_pool.updates._end_background_update(
+                self.POPULATE_RECEIPT_EVENT_STREAM_ORDERING
+            )
+
+        return batch_size
+
+
+class ReceiptsStore(ReceiptsWorkerStore, ReceiptsBackgroundUpdateStore):
     pass
diff --git a/synapse/storage/schema/main/delta/72/05receipts_event_stream_ordering.sql b/synapse/storage/schema/main/delta/72/05receipts_event_stream_ordering.sql
new file mode 100644
index 0000000000..2a822f4509
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/05receipts_event_stream_ordering.sql
@@ -0,0 +1,19 @@
+/* Copyright 2022 Beeper
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE receipts_linearized ADD COLUMN event_stream_ordering BIGINT;
+
+INSERT INTO background_updates (update_name, progress_json) VALUES
+  ('populate_event_stream_ordering', '{}');
-- 
cgit 1.5.1


From b60d47ab2c55580fc1941497964cd33c27838231 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 13 Sep 2022 10:53:11 +0100
Subject: Updates to the schema dump script (#13770)

---
 changelog.d/13770.misc                             |  1 +
 scripts-dev/make_full_schema.sh                    | 48 ++++++++--------------
 .../storage/schema/state/delta/30/state_stream.sql |  4 ++
 3 files changed, 21 insertions(+), 32 deletions(-)
 create mode 100644 changelog.d/13770.misc

(limited to 'synapse')

diff --git a/changelog.d/13770.misc b/changelog.d/13770.misc
new file mode 100644
index 0000000000..36ac91400a
--- /dev/null
+++ b/changelog.d/13770.misc
@@ -0,0 +1 @@
+Update the script which makes full schema dumps.
diff --git a/scripts-dev/make_full_schema.sh b/scripts-dev/make_full_schema.sh
index f0e22d4ca2..61394360ce 100755
--- a/scripts-dev/make_full_schema.sh
+++ b/scripts-dev/make_full_schema.sh
@@ -9,8 +9,10 @@
 export PGHOST="localhost"
 POSTGRES_DB_NAME="synapse_full_schema.$$"
 
-SQLITE_FULL_SCHEMA_OUTPUT_FILE="full.sql.sqlite"
-POSTGRES_FULL_SCHEMA_OUTPUT_FILE="full.sql.postgres"
+SQLITE_SCHEMA_FILE="schema.sql.sqlite"
+SQLITE_ROWS_FILE="rows.sql.sqlite"
+POSTGRES_SCHEMA_FILE="full.sql.postgres"
+POSTGRES_ROWS_FILE="rows.sql.postgres"
 
 REQUIRED_DEPS=("matrix-synapse" "psycopg2")
 
@@ -22,7 +24,7 @@ usage() {
   echo "  Username to connect to local postgres instance. The password will be requested"
   echo "  during script execution."
   echo "-c"
-  echo "  CI mode. Enables coverage tracking and prints every command that the script runs."
+  echo "  CI mode. Prints every command that the script runs."
   echo "-o <path>"
   echo "  Directory to output full schema files to."
   echo "-h"
@@ -37,11 +39,6 @@ while getopts "p:co:h" opt; do
     c)
       # Print all commands that are being executed
       set -x
-
-      # Modify required dependencies for coverage
-      REQUIRED_DEPS+=("coverage" "coverage-enable-subprocess")
-
-      COVERAGE=1
       ;;
     o)
       command -v realpath > /dev/null || (echo "The -o flag requires the 'realpath' binary to be installed" && exit 1)
@@ -102,6 +99,7 @@ SQLITE_DB=$TMPDIR/homeserver.db
 POSTGRES_CONFIG=$TMPDIR/postgres.conf
 
 # Ensure these files are delete on script exit
+# TODO: the trap should also drop the temp postgres DB
 trap 'rm -rf $TMPDIR' EXIT
 
 cat > "$SQLITE_CONFIG" <<EOF
@@ -147,48 +145,34 @@ python -m synapse.app.homeserver --generate-keys -c "$SQLITE_CONFIG"
 
 # Make sure the SQLite3 database is using the latest schema and has no pending background update.
 echo "Running db background jobs..."
-synapse/_scripts/update_synapse_database.py --database-config --run-background-updates "$SQLITE_CONFIG"
+synapse/_scripts/update_synapse_database.py --database-config "$SQLITE_CONFIG" --run-background-updates
 
 # Create the PostgreSQL database.
 echo "Creating postgres database..."
 createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_DB_NAME"
 
-echo "Copying data from SQLite3 to Postgres with synapse_port_db..."
-if [ -z "$COVERAGE" ]; then
-  # No coverage needed
-  synapse/_scripts/synapse_port_db.py --sqlite-database "$SQLITE_DB" --postgres-config "$POSTGRES_CONFIG"
-else
-  # Coverage desired
-  coverage run synapse/_scripts/synapse_port_db.py --sqlite-database "$SQLITE_DB" --postgres-config "$POSTGRES_CONFIG"
-fi
+echo "Running db background jobs..."
+synapse/_scripts/update_synapse_database.py --database-config "$POSTGRES_CONFIG" --run-background-updates
+
 
 # Delete schema_version, applied_schema_deltas and applied_module_schemas tables
 # Also delete any shadow tables from fts4
-# This needs to be done after synapse_port_db is run
 echo "Dropping unwanted db tables..."
 SQL="
 DROP TABLE schema_version;
 DROP TABLE applied_schema_deltas;
 DROP TABLE applied_module_schemas;
-DROP TABLE event_search_content;
-DROP TABLE event_search_segments;
-DROP TABLE event_search_segdir;
-DROP TABLE event_search_docsize;
-DROP TABLE event_search_stat;
-DROP TABLE user_directory_search_content;
-DROP TABLE user_directory_search_segments;
-DROP TABLE user_directory_search_segdir;
-DROP TABLE user_directory_search_docsize;
-DROP TABLE user_directory_search_stat;
 "
 sqlite3 "$SQLITE_DB" <<< "$SQL"
 psql "$POSTGRES_DB_NAME" -w <<< "$SQL"
 
-echo "Dumping SQLite3 schema to '$OUTPUT_DIR/$SQLITE_FULL_SCHEMA_OUTPUT_FILE'..."
-sqlite3 "$SQLITE_DB" ".dump" > "$OUTPUT_DIR/$SQLITE_FULL_SCHEMA_OUTPUT_FILE"
+echo "Dumping SQLite3 schema to '$OUTPUT_DIR/$SQLITE_SCHEMA_FILE' and '$OUTPUT_DIR/$SQLITE_ROWS_FILE'..."
+sqlite3 "$SQLITE_DB" ".schema --indent" > "$OUTPUT_DIR/$SQLITE_SCHEMA_FILE"
+sqlite3 "$SQLITE_DB" ".dump --data-only --nosys" > "$OUTPUT_DIR/$SQLITE_ROWS_FILE"
 
-echo "Dumping Postgres schema to '$OUTPUT_DIR/$POSTGRES_FULL_SCHEMA_OUTPUT_FILE'..."
-pg_dump --format=plain --no-tablespaces --no-acl --no-owner $POSTGRES_DB_NAME | sed -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_FULL_SCHEMA_OUTPUT_FILE"
+echo "Dumping Postgres schema to '$OUTPUT_DIR/$POSTGRES_SCHEMA_FILE' and '$OUTPUT_DIR/$POSTGRES_ROWS_FILE'..."
+pg_dump --format=plain --schema-only         --no-tablespaces --no-acl --no-owner "$POSTGRES_DB_NAME" | sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_SCHEMA_FILE"
+pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_DB_NAME" | sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_ROWS_FILE"
 
 echo "Cleaning up temporary Postgres database..."
 dropdb $POSTGRES_DB_NAME
diff --git a/synapse/storage/schema/state/delta/30/state_stream.sql b/synapse/storage/schema/state/delta/30/state_stream.sql
index e85699e82e..bdaf8b02d5 100644
--- a/synapse/storage/schema/state/delta/30/state_stream.sql
+++ b/synapse/storage/schema/state/delta/30/state_stream.sql
@@ -26,6 +26,10 @@
  * (event, state) pair, we can use that stream_ordering to identify when
  * the new state was assigned for the event.
  */
+
+/* NB: This table belongs to the `main` logical database; it should not be present
+ * in `state`.
+ */
 CREATE TABLE IF NOT EXISTS ex_outlier_stream(
     event_stream_ordering BIGINT PRIMARY KEY NOT NULL,
     event_id TEXT NOT NULL,
-- 
cgit 1.5.1


From 12dacecabd27680dc77c17724953ecda0801b5ea Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Tue, 13 Sep 2022 16:14:28 +0200
Subject: Make sequence `cache_invalidation_stream_seq` begin at `2` (#13766)

Signed-off-by: Mathieu Velten <mathieuv@matrix.org>
Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
---
 changelog.d/13766.bugfix                           |  1 +
 synapse/storage/schema/__init__.py                 |  1 +
 ...8begin_cache_invalidation_seq_at_2.sql.postgres | 23 ++++++++++++++++++++++
 3 files changed, 25 insertions(+)
 create mode 100644 changelog.d/13766.bugfix
 create mode 100644 synapse/storage/schema/main/delta/72/08begin_cache_invalidation_seq_at_2.sql.postgres

(limited to 'synapse')

diff --git a/changelog.d/13766.bugfix b/changelog.d/13766.bugfix
new file mode 100644
index 0000000000..c708e54f9c
--- /dev/null
+++ b/changelog.d/13766.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where the `cache_invalidation_stream_seq` sequence would begin at 1 instead of 2.
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 256f745dc0..32cda5e3ba 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -76,6 +76,7 @@ Changes in SCHEMA_VERSION = 72:
     - event_edges.(room_id, is_state) are no longer written to.
     - Tables related to groups are dropped.
     - Unused column application_services_state.last_txn is dropped
+    - Cache invalidation stream id sequence now begins at 2 to match code expectation.
 """
 
 
diff --git a/synapse/storage/schema/main/delta/72/08begin_cache_invalidation_seq_at_2.sql.postgres b/synapse/storage/schema/main/delta/72/08begin_cache_invalidation_seq_at_2.sql.postgres
new file mode 100644
index 0000000000..69931fe971
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/08begin_cache_invalidation_seq_at_2.sql.postgres
@@ -0,0 +1,23 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ 
+
+-- The sequence needs to begin at 2 because a bunch of code assumes that
+-- get_next_id_txn will return values >= 2, cf this comment:
+-- https://github.com/matrix-org/synapse/blob/b93bd95e8ab64d27ae26841020f62ee61272a5f2/synapse/storage/util/id_generators.py#L344
+
+SELECT setval('cache_invalidation_stream_seq', (
+    SELECT COALESCE(MAX(last_value), 1) FROM cache_invalidation_stream_seq
+));
-- 
cgit 1.5.1


From b77d49f4410ceb42880af2882e70b3b3164955cc Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 13 Sep 2022 16:29:51 -0500
Subject: Hopefully fix problem when OTEL not installed with non recording span

```
Traceback (most recent call last):
  File "/usr/local/lib/python3.9/site-packages/synapse/http/server.py", line 306, in _async_render_wrapper
    callback_return = await self._async_render(request)
  File "/usr/local/lib/python3.9/site-packages/synapse/http/server.py", line 512, in _async_render
    callback_return = await raw_callback_return
  File "/usr/local/lib/python3.9/site-packages/synapse/federation/transport/server/_base.py", line 357, in new_func
    remote_parent_span = create_non_recording_span()
  File "/usr/local/lib/python3.9/site-packages/synapse/logging/tracing.py", line 502, in create_non_recording_span
    return opentelemetry.trace.NonRecordingSpan(
AttributeError: 'NoneType' object has no attribute 'trace'
```
---
 synapse/federation/transport/server/_base.py  |  1 +
 synapse/logging/tracing.py                    | 37 +++++++++++++++------------
 synapse/metrics/background_process_metrics.py |  2 +-
 3 files changed, 23 insertions(+), 17 deletions(-)

(limited to 'synapse')

diff --git a/synapse/federation/transport/server/_base.py b/synapse/federation/transport/server/_base.py
index 441ff8ce71..cba690e795 100644
--- a/synapse/federation/transport/server/_base.py
+++ b/synapse/federation/transport/server/_base.py
@@ -320,6 +320,7 @@ class BaseFederationServlet:
             if origin and whitelisted_homeserver(origin):
                 origin_context = context_from_request(request)
 
+            remote_parent_span = None
             if origin_context:
                 local_servlet_span = get_active_span()
                 # Create a span which uses the `origin_context` as a parent
diff --git a/synapse/logging/tracing.py b/synapse/logging/tracing.py
index 2fe9875398..b92c1d96e3 100644
--- a/synapse/logging/tracing.py
+++ b/synapse/logging/tracing.py
@@ -487,17 +487,19 @@ def whitelisted_homeserver(destination: str) -> bool:
 
 
 def use_span(
-    span: "opentelemetry.trace.Span",
+    span: Optional["opentelemetry.trace.Span"],
     end_on_exit: bool = True,
-) -> ContextManager["opentelemetry.trace.Span"]:
-    if opentelemetry is None:
-        return contextlib.nullcontext()  # type: ignore[unreachable]
+) -> ContextManager[Optional["opentelemetry.trace.Span"]]:
+    if opentelemetry is None or span is None:
+        return contextlib.nullcontext()
 
     return opentelemetry.trace.use_span(span=span, end_on_exit=end_on_exit)
 
 
-def create_non_recording_span() -> "opentelemetry.trace.Span":
+def create_non_recording_span() -> Optional["opentelemetry.trace.Span"]:
     """Create a no-op span that does not record or become part of a recorded trace"""
+    if opentelemetry is None:
+        return None  # type: ignore[unreachable]
 
     return opentelemetry.trace.NonRecordingSpan(
         opentelemetry.trace.INVALID_SPAN_CONTEXT
@@ -554,7 +556,7 @@ def start_active_span(
     end_on_exit: bool = True,
     # For testing only
     tracer: Optional["opentelemetry.trace.Tracer"] = None,
-) -> ContextManager["opentelemetry.trace.Span"]:
+) -> ContextManager[Optional["opentelemetry.trace.Span"]]:
     if opentelemetry is None:
         return contextlib.nullcontext()  # type: ignore[unreachable]
 
@@ -588,7 +590,7 @@ def start_active_span_from_edu(
     operation_name: str,
     *,
     edu_content: Dict[str, Any],
-) -> ContextManager["opentelemetry.trace.Span"]:
+) -> ContextManager[Optional["opentelemetry.trace.Span"]]:
     """
     Extracts a span context from an edu and uses it to start a new active span
 
@@ -1003,17 +1005,20 @@ def trace_servlet(
         # finished sending the response to the client.
         end_on_exit=False,
     ) as span:
-        request.set_tracing_span(span)
+        if span:
+            request.set_tracing_span(span)
 
         inject_trace_id_into_response_headers(request.responseHeaders)
         try:
             yield
         finally:
-            # We set the operation name again in case its changed (which happens
-            # with JsonResource).
-            span.update_name(request.request_metrics.name)
-
-            if request.request_metrics.start_context.tag is not None:
-                span.set_attribute(
-                    SynapseTags.REQUEST_TAG, request.request_metrics.start_context.tag
-                )
+            if span:
+                # We set the operation name again in case its changed (which happens
+                # with JsonResource).
+                span.update_name(request.request_metrics.name)
+
+                if request.request_metrics.start_context.tag is not None:
+                    span.set_attribute(
+                        SynapseTags.REQUEST_TAG,
+                        request.request_metrics.start_context.tag,
+                    )
diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py
index 59d956dd9d..ad639fe223 100644
--- a/synapse/metrics/background_process_metrics.py
+++ b/synapse/metrics/background_process_metrics.py
@@ -236,7 +236,7 @@ def run_as_background_process(
                         attributes={SynapseTags.REQUEST_ID: str(context)},
                     )
                 else:
-                    ctx = nullcontext()  # type: ignore[assignment]
+                    ctx = nullcontext()
                 with ctx:
                     return await func(*args, **kwargs)
             except Exception:
-- 
cgit 1.5.1


From a027c6e9fe1122e9279f9158386a4e218d8f9433 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Wed, 14 Sep 2022 01:59:37 -0500
Subject: Maybe fix positional argument mismatch for DummyLink

Hopefully fix:
```
  File "/home/runner/work/synapse/synapse/synapse/storage/controllers/persist_events.py", line 246, in add_to_queue
    links=[Link(end_item.tracing_span_context)],
builtins.TypeError: __init__() takes 1 positional argument but 2 were given
```
---
 synapse/logging/tracing.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'synapse')

diff --git a/synapse/logging/tracing.py b/synapse/logging/tracing.py
index b92c1d96e3..d594ab735b 100644
--- a/synapse/logging/tracing.py
+++ b/synapse/logging/tracing.py
@@ -216,7 +216,7 @@ class _DummyLookup(object):
 class DummyLink(ABC):
     """Dummy placeholder for `opentelemetry.trace.Link`"""
 
-    def __init__(self) -> None:
+    def __init__(self, *args: Any) -> None:
         self.not_implemented_message = (
             "opentelemetry isn't installed so this is just a dummy link placeholder"
         )
@@ -547,7 +547,7 @@ def start_active_span(
     name: str,
     *,
     context: Optional["opentelemetry.context.context.Context"] = None,
-    kind: Optional["opentelemetry.trace.SpanKind"] = SpanKind.INTERNAL,
+    kind: "opentelemetry.trace.SpanKind" = SpanKind.INTERNAL,
     attributes: "opentelemetry.util.types.Attributes" = None,
     links: Optional[Sequence["opentelemetry.trace.Link"]] = None,
     start_time: Optional[int] = None,
@@ -560,11 +560,6 @@ def start_active_span(
     if opentelemetry is None:
         return contextlib.nullcontext()  # type: ignore[unreachable]
 
-    # TODO: Why is this necessary to satisfy this error? It has a default?
-    #  ` error: Argument "kind" to "start_span" of "Tracer" has incompatible type "Optional[SpanKind]"; expected "SpanKind"  [arg-type]`
-    if kind is None:
-        kind = SpanKind.INTERNAL
-
     span = start_span(
         name=name,
         context=context,
-- 
cgit 1.5.1


From 21687ec189f404bcee98ae61b008afc8c5094400 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Wed, 14 Sep 2022 08:28:12 +0000
Subject: Fix a long-standing spec compliance bug where Synapse would accept a
 trailing slash on the end of `/get_missing_events` federation requests.
 (#13789)

* Don't accept a trailing slash on the end of /get_missing_events

* Newsfile

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
---
 changelog.d/13789.bugfix                          | 1 +
 synapse/federation/transport/server/federation.py | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13789.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13789.bugfix b/changelog.d/13789.bugfix
new file mode 100644
index 0000000000..9e1e3e0fa7
--- /dev/null
+++ b/changelog.d/13789.bugfix
@@ -0,0 +1 @@
+Fix a long-standing spec compliance bug where Synapse would accept a trailing slash on the end of `/get_missing_events` federation requests.
\ No newline at end of file
diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py
index f7884bfbe0..6bb4659c4c 100644
--- a/synapse/federation/transport/server/federation.py
+++ b/synapse/federation/transport/server/federation.py
@@ -549,8 +549,7 @@ class FederationClientKeysClaimServlet(BaseFederationServerServlet):
 
 
 class FederationGetMissingEventsServlet(BaseFederationServerServlet):
-    # TODO(paul): Why does this path alone end with "/?" optional?
-    PATH = "/get_missing_events/(?P<room_id>[^/]*)/?"
+    PATH = "/get_missing_events/(?P<room_id>[^/]*)"
 
     async def on_POST(
         self,
-- 
cgit 1.5.1


From c73774467edb04c372caecb9e843542654f7610b Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Wed, 14 Sep 2022 10:42:57 +0100
Subject: Fix bug in device list caching when remote users leave rooms (#13749)

When a remote user leaves the last room shared with the homeserver, we
have to mark their device list as unsubscribed, otherwise we would hold
on to a stale device list in our cache. Crucially, the device list would
remain cached even after the remote user rejoined the room, which could
lead to E2EE failures until the next change to the remote user's device
list.

Fixes #13651.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/13749.bugfix                      |  1 +
 synapse/handlers/device.py                    | 11 -----------
 synapse/handlers/e2e_keys.py                  | 26 ++++++++++++++++++++++++++
 synapse/storage/controllers/persist_events.py | 20 +++++++++++++++++---
 tests/handlers/test_e2e_keys.py               |  8 +++++++-
 5 files changed, 51 insertions(+), 15 deletions(-)
 create mode 100644 changelog.d/13749.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13749.bugfix b/changelog.d/13749.bugfix
new file mode 100644
index 0000000000..8ffafec07b
--- /dev/null
+++ b/changelog.d/13749.bugfix
@@ -0,0 +1 @@
+Fix a long standing bug where device lists would remain cached when remote users left and rejoined the last room shared with the local homeserver.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index c5ac169644..901e2310b7 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -45,7 +45,6 @@ from synapse.types import (
     JsonDict,
     StreamKeyType,
     StreamToken,
-    UserID,
     get_domain_from_id,
     get_verify_key_from_cross_signing_key,
 )
@@ -324,8 +323,6 @@ class DeviceHandler(DeviceWorkerHandler):
             self.device_list_updater.incoming_device_list_update,
         )
 
-        hs.get_distributor().observe("user_left_room", self.user_left_room)
-
         # Whether `_handle_new_device_update_async` is currently processing.
         self._handle_new_device_update_is_processing = False
 
@@ -569,14 +566,6 @@ class DeviceHandler(DeviceWorkerHandler):
             StreamKeyType.DEVICE_LIST, position, users=[from_user_id]
         )
 
-    async def user_left_room(self, user: UserID, room_id: str) -> None:
-        user_id = user.to_string()
-        room_ids = await self.store.get_rooms_for_user(user_id)
-        if not room_ids:
-            # We no longer share rooms with this user, so we'll no longer
-            # receive device updates. Mark this in DB.
-            await self.store.mark_remote_user_device_list_as_unsubscribed(user_id)
-
     async def store_dehydrated_device(
         self,
         user_id: str,
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index ec81639c78..8eed63ccf3 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -175,6 +175,32 @@ class E2eKeysHandler:
                     user_ids_not_in_cache,
                     remote_results,
                 ) = await self.store.get_user_devices_from_cache(query_list)
+
+                # Check that the homeserver still shares a room with all cached users.
+                # Note that this check may be slightly racy when a remote user leaves a
+                # room after we have fetched their cached device list. In the worst case
+                # we will do extra federation queries for devices that we had cached.
+                cached_users = set(remote_results.keys())
+                valid_cached_users = (
+                    await self.store.get_users_server_still_shares_room_with(
+                        remote_results.keys()
+                    )
+                )
+                invalid_cached_users = cached_users - valid_cached_users
+                if invalid_cached_users:
+                    # Fix up results. If we get here, there is either a bug in device
+                    # list tracking, or we hit the race mentioned above.
+                    user_ids_not_in_cache.update(invalid_cached_users)
+                    for invalid_user_id in invalid_cached_users:
+                        remote_results.pop(invalid_user_id)
+                    # This log message may be removed if it turns out it's almost
+                    # entirely triggered by races.
+                    logger.error(
+                        "Devices for %s were cached, but the server no longer shares "
+                        "any rooms with them. The cached device lists are stale.",
+                        invalid_cached_users,
+                    )
+
                 for user_id, devices in remote_results.items():
                     user_devices = results.setdefault(user_id, {})
                     for device_id, device in devices.items():
diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py
index dad3731b9b..501dbbc990 100644
--- a/synapse/storage/controllers/persist_events.py
+++ b/synapse/storage/controllers/persist_events.py
@@ -598,9 +598,9 @@ class EventsPersistenceStorageController:
             # room
             state_delta_for_room: Dict[str, DeltaState] = {}
 
-            # Set of remote users which were in rooms the server has left. We
-            # should check if we still share any rooms and if not we mark their
-            # device lists as stale.
+            # Set of remote users which were in rooms the server has left or who may
+            # have left rooms the server is in. We should check if we still share any
+            # rooms and if not we mark their device lists as stale.
             potentially_left_users: Set[str] = set()
 
             if not backfilled:
@@ -725,6 +725,20 @@ class EventsPersistenceStorageController:
                                 current_state = {}
                                 delta.no_longer_in_room = True
 
+                            # Add all remote users that might have left rooms.
+                            potentially_left_users.update(
+                                user_id
+                                for event_type, user_id in delta.to_delete
+                                if event_type == EventTypes.Member
+                                and not self.is_mine_id(user_id)
+                            )
+                            potentially_left_users.update(
+                                user_id
+                                for event_type, user_id in delta.to_insert.keys()
+                                if event_type == EventTypes.Member
+                                and not self.is_mine_id(user_id)
+                            )
+
                             state_delta_for_room[room_id] = delta
 
             await self.persist_events_store._persist_events_and_state_updates(
diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py
index 1e6ad4b663..95698bc275 100644
--- a/tests/handlers/test_e2e_keys.py
+++ b/tests/handlers/test_e2e_keys.py
@@ -891,6 +891,12 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
             new_callable=mock.MagicMock,
             return_value=make_awaitable(["some_room_id"]),
         )
+        mock_get_users = mock.patch.object(
+            self.store,
+            "get_users_server_still_shares_room_with",
+            new_callable=mock.MagicMock,
+            return_value=make_awaitable({remote_user_id}),
+        )
         mock_request = mock.patch.object(
             self.hs.get_federation_client(),
             "query_user_devices",
@@ -898,7 +904,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
             return_value=make_awaitable(response_body),
         )
 
-        with mock_get_rooms, mock_request as mocked_federation_request:
+        with mock_get_rooms, mock_get_users, mock_request as mocked_federation_request:
             # Make the first query and sanity check it succeeds.
             response_1 = self.get_success(
                 e2e_handler.query_devices(
-- 
cgit 1.5.1


From 51a77e990b7a59e460ab22a2788ab8c3506b9a2c Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 14 Sep 2022 14:16:12 +0100
Subject: Remove incorrect migration file from `state` logical DB (#13788)

* Remove incorrect migration file from `state` logical DB

The table `ex_outlier_stream` is part of the `main` logical DB; it
should not have been created in the `state` logical DB. We remove this
migration now as a tidy-up.

Note: we cannot `DROP TABLE IF EXISTS ex_outlier_stream` in a new
migration, because some (most) instances of Synapse host both of these
logical DBs on the same DB cluster.

* Changelog
---
 changelog.d/13788.misc                             |  1 +
 .../storage/schema/state/delta/30/state_stream.sql | 37 ----------------------
 2 files changed, 1 insertion(+), 37 deletions(-)
 create mode 100644 changelog.d/13788.misc
 delete mode 100644 synapse/storage/schema/state/delta/30/state_stream.sql

(limited to 'synapse')

diff --git a/changelog.d/13788.misc b/changelog.d/13788.misc
new file mode 100644
index 0000000000..7263b1ac52
--- /dev/null
+++ b/changelog.d/13788.misc
@@ -0,0 +1 @@
+Remove an old, incorrect migration file.
diff --git a/synapse/storage/schema/state/delta/30/state_stream.sql b/synapse/storage/schema/state/delta/30/state_stream.sql
deleted file mode 100644
index bdaf8b02d5..0000000000
--- a/synapse/storage/schema/state/delta/30/state_stream.sql
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Copyright 2016 OpenMarket Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-/* We used to create a table called current_state_resets, but this is no
- * longer used and is removed in delta 54.
- */
-
-/* The outlier events that have aquired a state group typically through
- * backfill. This is tracked separately to the events table, as assigning a
- * state group change the position of the existing event in the stream
- * ordering.
- * However since a stream_ordering is assigned in persist_event for the
- * (event, state) pair, we can use that stream_ordering to identify when
- * the new state was assigned for the event.
- */
-
-/* NB: This table belongs to the `main` logical database; it should not be present
- * in `state`.
- */
-CREATE TABLE IF NOT EXISTS ex_outlier_stream(
-    event_stream_ordering BIGINT PRIMARY KEY NOT NULL,
-    event_id TEXT NOT NULL,
-    state_group BIGINT NOT NULL
-);
-- 
cgit 1.5.1


From eaed4e6113f5ed40056fa02ae922cb273d02be6e Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Wed, 14 Sep 2022 16:33:54 +0200
Subject: Remove unused method in `synapse.api.auth.Auth`. (#13795)

Clean-up from b19060a29b4f73897847db2aba5d03ec819086e0 (#13094)
and 73af10f419346a5f2d70131ac1ed8e69942edca0 (#13093) which removed
all callers.
---
 changelog.d/13795.misc | 1 +
 synapse/api/auth.py    | 9 ---------
 2 files changed, 1 insertion(+), 9 deletions(-)
 create mode 100644 changelog.d/13795.misc

(limited to 'synapse')

diff --git a/changelog.d/13795.misc b/changelog.d/13795.misc
new file mode 100644
index 0000000000..20d90cc130
--- /dev/null
+++ b/changelog.d/13795.misc
@@ -0,0 +1 @@
+Remove unused method in `synapse.api.auth.Auth`.
diff --git a/synapse/api/auth.py b/synapse/api/auth.py
index 4a75eb6b21..3d7f986ac7 100644
--- a/synapse/api/auth.py
+++ b/synapse/api/auth.py
@@ -459,15 +459,6 @@ class Auth:
             )
             raise InvalidClientTokenError("Invalid access token passed.")
 
-    def get_appservice_by_req(self, request: SynapseRequest) -> ApplicationService:
-        token = self.get_access_token_from_request(request)
-        service = self.store.get_app_service_by_token(token)
-        if not service:
-            logger.warning("Unrecognised appservice access token.")
-            raise InvalidClientTokenError()
-        request.requester = create_requester(service.sender, app_service=service)
-        return service
-
     async def is_server_admin(self, requester: Requester) -> bool:
         """Check if the given user is a local server admin.
 
-- 
cgit 1.5.1


From cf65433de26ecce551c64e56d9ee8435c99defab Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Wed, 14 Sep 2022 15:29:05 +0000
Subject: Fix a memory leak when running the unit tests. (#13798)

---
 changelog.d/13798.misc          |  1 +
 synapse/util/caches/__init__.py |  3 ++-
 synapse/util/metrics.py         | 10 +++++-----
 3 files changed, 8 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/13798.misc

(limited to 'synapse')

diff --git a/changelog.d/13798.misc b/changelog.d/13798.misc
new file mode 100644
index 0000000000..e4ec2d77d6
--- /dev/null
+++ b/changelog.d/13798.misc
@@ -0,0 +1 @@
+Fix a memory leak when running the unit tests.
\ No newline at end of file
diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py
index 35c0be08b0..f7c3a6794e 100644
--- a/synapse/util/caches/__init__.py
+++ b/synapse/util/caches/__init__.py
@@ -205,8 +205,9 @@ def register_cache(
         add_resizable_cache(cache_name, resize_callback)
 
     metric = CacheMetric(cache, cache_type, cache_name, collect_callback)
+    metric_name = "cache_%s_%s" % (cache_type, cache_name)
     caches_by_name[cache_name] = cache
-    CACHE_METRIC_REGISTRY.register_hook(metric.collect)
+    CACHE_METRIC_REGISTRY.register_hook(metric_name, metric.collect)
     return metric
 
 
diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py
index 9687120ebf..165480bdbe 100644
--- a/synapse/util/metrics.py
+++ b/synapse/util/metrics.py
@@ -15,7 +15,7 @@
 import logging
 from functools import wraps
 from types import TracebackType
-from typing import Awaitable, Callable, Generator, List, Optional, Type, TypeVar
+from typing import Awaitable, Callable, Dict, Generator, Optional, Type, TypeVar
 
 from prometheus_client import CollectorRegistry, Counter, Metric
 from typing_extensions import Concatenate, ParamSpec, Protocol
@@ -220,21 +220,21 @@ class DynamicCollectorRegistry(CollectorRegistry):
 
     def __init__(self) -> None:
         super().__init__()
-        self._pre_update_hooks: List[Callable[[], None]] = []
+        self._pre_update_hooks: Dict[str, Callable[[], None]] = {}
 
     def collect(self) -> Generator[Metric, None, None]:
         """
         Collects metrics, calling pre-update hooks first.
         """
 
-        for pre_update_hook in self._pre_update_hooks:
+        for pre_update_hook in self._pre_update_hooks.values():
             pre_update_hook()
 
         yield from super().collect()
 
-    def register_hook(self, hook: Callable[[], None]) -> None:
+    def register_hook(self, metric_name: str, hook: Callable[[], None]) -> None:
         """
         Registers a hook that is called before metric collection.
         """
 
-        self._pre_update_hooks.append(hook)
+        self._pre_update_hooks[metric_name] = hook
-- 
cgit 1.5.1


From 6302753012927b63feddc71dd287e2d3554707d4 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Wed, 14 Sep 2022 15:53:18 +0000
Subject: Deduplicate `is_server_notices_room`. (#13780)

---
 changelog.d/13780.misc                       |  1 +
 synapse/handlers/message.py                  | 10 +---------
 synapse/handlers/room_member.py              | 10 +---------
 synapse/storage/databases/main/roommember.py | 17 +++++++++++++++++
 4 files changed, 20 insertions(+), 18 deletions(-)
 create mode 100644 changelog.d/13780.misc

(limited to 'synapse')

diff --git a/changelog.d/13780.misc b/changelog.d/13780.misc
new file mode 100644
index 0000000000..1bcac51cad
--- /dev/null
+++ b/changelog.d/13780.misc
@@ -0,0 +1 @@
+Deduplicate `is_server_notices_room`.
\ No newline at end of file
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 72157d5a36..e07cda133a 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -752,20 +752,12 @@ class EventCreationHandler:
         if builder.type == EventTypes.Member:
             membership = builder.content.get("membership", None)
             if membership == Membership.JOIN:
-                return await self._is_server_notices_room(builder.room_id)
+                return await self.store.is_server_notice_room(builder.room_id)
             elif membership == Membership.LEAVE:
                 # the user is always allowed to leave (but not kick people)
                 return builder.state_key == requester.user.to_string()
         return False
 
-    async def _is_server_notices_room(self, room_id: str) -> bool:
-        if self.config.servernotices.server_notices_mxid is None:
-            return False
-        is_server_notices_room = await self.store.check_local_user_in_room(
-            user_id=self.config.servernotices.server_notices_mxid, room_id=room_id
-        )
-        return is_server_notices_room
-
     async def assert_accepted_privacy_policy(self, requester: Requester) -> None:
         """Check if a user has accepted the privacy policy
 
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 5d4adf5bfd..8d01f4bf2b 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -837,7 +837,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                 old_membership == Membership.INVITE
                 and effective_membership_state == Membership.LEAVE
             ):
-                is_blocked = await self._is_server_notice_room(room_id)
+                is_blocked = await self.store.is_server_notice_room(room_id)
                 if is_blocked:
                     raise SynapseError(
                         HTTPStatus.FORBIDDEN,
@@ -1617,14 +1617,6 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
 
         return False
 
-    async def _is_server_notice_room(self, room_id: str) -> bool:
-        if self._server_notices_mxid is None:
-            return False
-        is_server_notices_room = await self.store.check_local_user_in_room(
-            user_id=self._server_notices_mxid, room_id=room_id
-        )
-        return is_server_notices_room
-
 
 class RoomMemberMasterHandler(RoomMemberHandler):
     def __init__(self, hs: "HomeServer"):
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index fdb4684e12..a8d224602a 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -88,6 +88,8 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         # at a time. Keyed by room_id.
         self._joined_host_linearizer = Linearizer("_JoinedHostsCache")
 
+        self._server_notices_mxid = hs.config.servernotices.server_notices_mxid
+
         if (
             self.hs.config.worker.run_background_tasks
             and self.hs.config.metrics.metrics_flags.known_servers
@@ -504,6 +506,21 @@ class RoomMemberWorkerStore(EventsWorkerStore):
 
         return membership == Membership.JOIN
 
+    async def is_server_notice_room(self, room_id: str) -> bool:
+        """
+        Determines whether the given room is a 'Server Notices' room, used for
+        sending server notices to a user.
+
+        This is determined by seeing whether the server notices user is present
+        in the room.
+        """
+        if self._server_notices_mxid is None:
+            return False
+        is_server_notices_room = await self.check_local_user_in_room(
+            user_id=self._server_notices_mxid, room_id=room_id
+        )
+        return is_server_notices_room
+
     async def get_local_current_membership_for_user_in_room(
         self, user_id: str, room_id: str
     ) -> Tuple[Optional[str], Optional[str]]:
-- 
cgit 1.5.1


From f2d12ccabef17faa0bf6b34fbb6d944849afc4d4 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 14 Sep 2022 12:01:42 -0400
Subject: Use partial indices on SQLIte. (#13802)

Partial indices have been supported since SQLite 3.8, but Synapse
now requires >= 3.27, so we can enable support for them.

This requires rebuilding previous indices which were partial on
PostgreSQL, but not on SQLite.
---
 changelog.d/13802.misc                             |  1 +
 synapse/storage/background_updates.py              |  6 +--
 .../storage/databases/main/event_push_actions.py   |  1 -
 .../main/delta/72/09partial_indices.sql.sqlite     | 56 ++++++++++++++++++++++
 4 files changed, 59 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/13802.misc
 create mode 100644 synapse/storage/schema/main/delta/72/09partial_indices.sql.sqlite

(limited to 'synapse')

diff --git a/changelog.d/13802.misc b/changelog.d/13802.misc
new file mode 100644
index 0000000000..0d55071326
--- /dev/null
+++ b/changelog.d/13802.misc
@@ -0,0 +1 @@
+Use partial indices on SQLite.
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index 555b4e77d2..cf1eabc437 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -581,9 +581,6 @@ class BackgroundUpdater:
         def create_index_sqlite(conn: Connection) -> None:
             # Sqlite doesn't support concurrent creation of indexes.
             #
-            # We don't use partial indices on SQLite as it wasn't introduced
-            # until 3.8, and wheezy and CentOS 7 have 3.7
-            #
             # We assume that sqlite doesn't give us invalid indices; however
             # we may still end up with the index existing but the
             # background_updates not having been recorded if synapse got shut
@@ -591,12 +588,13 @@ class BackgroundUpdater:
             # has supported CREATE TABLE|INDEX IF NOT EXISTS since 3.3.0.)
             sql = (
                 "CREATE %(unique)s INDEX IF NOT EXISTS %(name)s ON %(table)s"
-                " (%(columns)s)"
+                " (%(columns)s) %(where_clause)s"
             ) % {
                 "unique": "UNIQUE" if unique else "",
                 "name": index_name,
                 "table": table,
                 "columns": ", ".join(columns),
+                "where_clause": "WHERE " + where_clause if where_clause else "",
             }
 
             c = conn.cursor()
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index f4a07de2a3..3a3fb8c507 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -1255,7 +1255,6 @@ class EventPushActionsStore(EventPushActionsWorkerStore):
             table="event_push_actions",
             columns=["highlight", "stream_ordering"],
             where_clause="highlight=0",
-            psql_only=True,
         )
 
     async def get_push_actions_for_user(
diff --git a/synapse/storage/schema/main/delta/72/09partial_indices.sql.sqlite b/synapse/storage/schema/main/delta/72/09partial_indices.sql.sqlite
new file mode 100644
index 0000000000..c8dfdf0218
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/09partial_indices.sql.sqlite
@@ -0,0 +1,56 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ 
+-- SQLite needs to rebuild indices which use partial indices on Postgres, but
+-- previously did not use them on SQLite.
+
+-- Drop each index that was added with register_background_index_update AND specified
+-- a where_clause (that existed before this delta).
+
+-- From events_bg_updates.py
+DROP INDEX IF EXISTS event_contains_url_index;
+-- There is also a redactions_censored_redacts index, but that gets dropped.
+DROP INDEX IF EXISTS redactions_have_censored_ts;
+-- There is also a PostgreSQL only index (event_contains_url_index2)
+-- which gets renamed to event_contains_url_index.
+
+-- From roommember.py
+DROP INDEX IF EXISTS room_memberships_user_room_forgotten;
+
+-- From presence.py
+DROP INDEX IF EXISTS presence_stream_state_not_offline_idx;
+
+-- From media_repository.py
+DROP INDEX IF EXISTS local_media_repository_url_idx;
+
+-- From event_push_actions.py
+DROP INDEX IF EXISTS event_push_actions_highlights_index;
+-- There's also a event_push_actions_stream_highlight_index which was previously
+-- PostgreSQL-only.
+
+-- From state.py
+DROP INDEX IF EXISTS current_state_events_member_index;
+
+-- Re-insert the background jobs to re-create the indices.
+INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES
+  (7209, 'event_contains_url_index', '{}', NULL),
+  (7209, 'redactions_have_censored_ts_idx', '{}', NULL),
+  (7209, 'room_membership_forgotten_idx', '{}', NULL),
+  (7209, 'presence_stream_not_offline_index', '{}', NULL),
+  (7209, 'local_media_repository_url_idx', '{}', NULL),
+  (7209, 'event_push_actions_highlights_index', '{}', NULL),
+  (7209, 'event_push_actions_stream_highlight_index', '{}', NULL),
+  (7209, 'current_state_members_idx', '{}', NULL)
+ON CONFLICT (update_name) DO NOTHING;
-- 
cgit 1.5.1


From 666ae877292d4747b9441105e3df8558f7a335c0 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 14 Sep 2022 13:11:16 -0400
Subject: Update event push action and receipt tables to support threads.
 (#13753)

Adds a `thread_id` column to the `event_push_actions`, `event_push_actions_staging`,
and `event_push_summary` tables. This will notifications to be segmented by the thread
in a future pull request. The `thread_id` column stores the root event ID or the special
value `"main"`.

The `thread_id` column for `event_push_actions` and `event_push_summary` is
backfilled with `"main"` for all existing rows. New entries into `event_push_actions`
and `event_push_actions_staging` will get the proper thread ID.

`receipts_linearized` and `receipts_graph` also gain a `thread_id` column, which is similar,
except `NULL` is a special value meaning the receipt is "unthreaded".

See MSC3771 and MSC3773 for where this data will be useful.
---
 changelog.d/13753.misc                             |   1 +
 synapse/push/bulk_push_rule_evaluator.py           |  29 ++---
 .../storage/databases/main/event_push_actions.py   | 121 ++++++++++++++++++++-
 synapse/storage/databases/main/events.py           |   4 +-
 synapse/storage/databases/main/receipts.py         |  20 ++++
 synapse/storage/schema/__init__.py                 |   6 +-
 .../main/delta/72/06thread_notifications.sql       |  30 +++++
 .../main/delta/72/07thread_receipts.sql.postgres   |  30 +++++
 .../main/delta/72/07thread_receipts.sql.sqlite     |  70 ++++++++++++
 .../schema/main/delta/72/08thread_receipts.sql     |  20 ++++
 tests/replication/slave/storage/test_events.py     |   1 +
 11 files changed, 312 insertions(+), 20 deletions(-)
 create mode 100644 changelog.d/13753.misc
 create mode 100644 synapse/storage/schema/main/delta/72/06thread_notifications.sql
 create mode 100644 synapse/storage/schema/main/delta/72/07thread_receipts.sql.postgres
 create mode 100644 synapse/storage/schema/main/delta/72/07thread_receipts.sql.sqlite
 create mode 100644 synapse/storage/schema/main/delta/72/08thread_receipts.sql

(limited to 'synapse')

diff --git a/changelog.d/13753.misc b/changelog.d/13753.misc
new file mode 100644
index 0000000000..63de2eb9f9
--- /dev/null
+++ b/changelog.d/13753.misc
@@ -0,0 +1 @@
+Prepatory work for storing thread IDs for notifications and receipts.
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index d1caf8a0f7..3846fbc5f0 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -198,7 +198,7 @@ class BulkPushRuleEvaluator:
         return pl_event.content if pl_event else {}, sender_level
 
     async def _get_mutual_relations(
-        self, event: EventBase, rules: Iterable[Tuple[PushRule, bool]]
+        self, parent_id: str, rules: Iterable[Tuple[PushRule, bool]]
     ) -> Dict[str, Set[Tuple[str, str]]]:
         """
         Fetch event metadata for events which related to the same event as the given event.
@@ -206,7 +206,7 @@ class BulkPushRuleEvaluator:
         If the given event has no relation information, returns an empty dictionary.
 
         Args:
-            event_id: The event ID which is targeted by relations.
+            parent_id: The event ID which is targeted by relations.
             rules: The push rules which will be processed for this event.
 
         Returns:
@@ -220,12 +220,6 @@ class BulkPushRuleEvaluator:
         if not self._relations_match_enabled:
             return {}
 
-        # If the event does not have a relation, then cannot have any mutual
-        # relations.
-        relation = relation_from_event(event)
-        if not relation:
-            return {}
-
         # Pre-filter to figure out which relation types are interesting.
         rel_types = set()
         for rule, enabled in rules:
@@ -246,9 +240,7 @@ class BulkPushRuleEvaluator:
             return {}
 
         # If any valid rules were found, fetch the mutual relations.
-        return await self.store.get_mutual_event_relations(
-            relation.parent_id, rel_types
-        )
+        return await self.store.get_mutual_event_relations(parent_id, rel_types)
 
     @measure_func("action_for_event_by_user")
     async def action_for_event_by_user(
@@ -281,9 +273,17 @@ class BulkPushRuleEvaluator:
             sender_power_level,
         ) = await self._get_power_levels_and_sender_level(event, context)
 
-        relations = await self._get_mutual_relations(
-            event, itertools.chain(*rules_by_user.values())
-        )
+        relation = relation_from_event(event)
+        # If the event does not have a relation, then cannot have any mutual
+        # relations or thread ID.
+        relations = {}
+        thread_id = "main"
+        if relation:
+            relations = await self._get_mutual_relations(
+                relation.parent_id, itertools.chain(*rules_by_user.values())
+            )
+            if relation.rel_type == RelationTypes.THREAD:
+                thread_id = relation.parent_id
 
         evaluator = PushRuleEvaluatorForEvent(
             event,
@@ -352,6 +352,7 @@ class BulkPushRuleEvaluator:
             event.event_id,
             actions_by_user,
             count_as_unread,
+            thread_id,
         )
 
 
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 3a3fb8c507..6b8668d2dc 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -98,6 +98,7 @@ from synapse.storage.database import (
 )
 from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
 from synapse.storage.databases.main.stream import StreamWorkerStore
+from synapse.types import JsonDict
 from synapse.util import json_encoder
 from synapse.util.caches.descriptors import cached
 
@@ -232,6 +233,104 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             replaces_index="event_push_summary_user_rm",
         )
 
+        self.db_pool.updates.register_background_index_update(
+            "event_push_summary_unique_index2",
+            index_name="event_push_summary_unique_index2",
+            table="event_push_summary",
+            columns=["user_id", "room_id", "thread_id"],
+            unique=True,
+        )
+
+        self.db_pool.updates.register_background_update_handler(
+            "event_push_backfill_thread_id",
+            self._background_backfill_thread_id,
+        )
+
+    async def _background_backfill_thread_id(
+        self, progress: JsonDict, batch_size: int
+    ) -> int:
+        """
+        Fill in the thread_id field for event_push_actions and event_push_summary.
+
+        This is preparatory so that it can be made non-nullable in the future.
+
+        Because all current (null) data is done in an unthreaded manner this
+        simply assumes it is on the "main" timeline. Since event_push_actions
+        are periodically cleared it is not possible to correctly re-calculate
+        the thread_id.
+        """
+        event_push_actions_done = progress.get("event_push_actions_done", False)
+
+        def add_thread_id_txn(
+            txn: LoggingTransaction, table_name: str, start_stream_ordering: int
+        ) -> int:
+            sql = f"""
+            SELECT stream_ordering
+            FROM {table_name}
+            WHERE
+                thread_id IS NULL
+                AND stream_ordering > ?
+            ORDER BY stream_ordering
+            LIMIT ?
+            """
+            txn.execute(sql, (start_stream_ordering, batch_size))
+
+            # No more rows to process.
+            rows = txn.fetchall()
+            if not rows:
+                progress[f"{table_name}_done"] = True
+                self.db_pool.updates._background_update_progress_txn(
+                    txn, "event_push_backfill_thread_id", progress
+                )
+                return 0
+
+            # Update the thread ID for any of those rows.
+            max_stream_ordering = rows[-1][0]
+
+            sql = f"""
+            UPDATE {table_name}
+            SET thread_id = 'main'
+            WHERE stream_ordering <= ? AND thread_id IS NULL
+            """
+            txn.execute(sql, (max_stream_ordering,))
+
+            # Update progress.
+            processed_rows = txn.rowcount
+            progress[f"max_{table_name}_stream_ordering"] = max_stream_ordering
+            self.db_pool.updates._background_update_progress_txn(
+                txn, "event_push_backfill_thread_id", progress
+            )
+
+            return processed_rows
+
+        # First update the event_push_actions table, then the event_push_summary table.
+        #
+        # Note that the event_push_actions_staging table is ignored since it is
+        # assumed that items in that table will only exist for a short period of
+        # time.
+        if not event_push_actions_done:
+            result = await self.db_pool.runInteraction(
+                "event_push_backfill_thread_id",
+                add_thread_id_txn,
+                "event_push_actions",
+                progress.get("max_event_push_actions_stream_ordering", 0),
+            )
+        else:
+            result = await self.db_pool.runInteraction(
+                "event_push_backfill_thread_id",
+                add_thread_id_txn,
+                "event_push_summary",
+                progress.get("max_event_push_summary_stream_ordering", 0),
+            )
+
+            # Only done after the event_push_summary table is done.
+            if not result:
+                await self.db_pool.updates._end_background_update(
+                    "event_push_backfill_thread_id"
+                )
+
+        return result
+
     @cached(tree=True, max_entries=5000)
     async def get_unread_event_push_actions_by_room_for_user(
         self,
@@ -670,6 +769,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         event_id: str,
         user_id_actions: Dict[str, Collection[Union[Mapping, str]]],
         count_as_unread: bool,
+        thread_id: str,
     ) -> None:
         """Add the push actions for the event to the push action staging area.
 
@@ -678,6 +778,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             user_id_actions: A mapping of user_id to list of push actions, where
                 an action can either be a string or dict.
             count_as_unread: Whether this event should increment unread counts.
+            thread_id: The thread this event is parent of, if applicable.
         """
         if not user_id_actions:
             return
@@ -686,7 +787,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         # can be used to insert into the `event_push_actions_staging` table.
         def _gen_entry(
             user_id: str, actions: Collection[Union[Mapping, str]]
-        ) -> Tuple[str, str, str, int, int, int]:
+        ) -> Tuple[str, str, str, int, int, int, str]:
             is_highlight = 1 if _action_has_highlight(actions) else 0
             notif = 1 if "notify" in actions else 0
             return (
@@ -696,11 +797,20 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 notif,  # notif column
                 is_highlight,  # highlight column
                 int(count_as_unread),  # unread column
+                thread_id,  # thread_id column
             )
 
         await self.db_pool.simple_insert_many(
             "event_push_actions_staging",
-            keys=("event_id", "user_id", "actions", "notif", "highlight", "unread"),
+            keys=(
+                "event_id",
+                "user_id",
+                "actions",
+                "notif",
+                "highlight",
+                "unread",
+                "thread_id",
+            ),
             values=[
                 _gen_entry(user_id, actions)
                 for user_id, actions in user_id_actions.items()
@@ -981,6 +1091,8 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             )
 
             # Replace the previous summary with the new counts.
+            #
+            # TODO(threads): Upsert per-thread instead of setting them all to main.
             self.db_pool.simple_upsert_txn(
                 txn,
                 table="event_push_summary",
@@ -990,6 +1102,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                     "unread_count": unread_count,
                     "stream_ordering": old_rotate_stream_ordering,
                     "last_receipt_stream_ordering": stream_ordering,
+                    "thread_id": "main",
                 },
             )
 
@@ -1138,17 +1251,19 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
 
         logger.info("Rotating notifications, handling %d rows", len(summaries))
 
+        # TODO(threads): Update on a per-thread basis.
         self.db_pool.simple_upsert_many_txn(
             txn,
             table="event_push_summary",
             key_names=("user_id", "room_id"),
             key_values=[(user_id, room_id) for user_id, room_id in summaries],
-            value_names=("notif_count", "unread_count", "stream_ordering"),
+            value_names=("notif_count", "unread_count", "stream_ordering", "thread_id"),
             value_values=[
                 (
                     summary.notif_count,
                     summary.unread_count,
                     summary.stream_ordering,
+                    "main",
                 )
                 for summary in summaries.values()
             ],
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index a4010ee28d..c0b4080e4b 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -2192,9 +2192,9 @@ class PersistEventsStore:
         sql = """
             INSERT INTO event_push_actions (
                 room_id, event_id, user_id, actions, stream_ordering,
-                topological_ordering, notif, highlight, unread
+                topological_ordering, notif, highlight, unread, thread_id
             )
-            SELECT ?, event_id, user_id, actions, ?, ?, notif, highlight, unread
+            SELECT ?, event_id, user_id, actions, ?, ?, notif, highlight, unread, thread_id
             FROM event_push_actions_staging
             WHERE event_id = ?
         """
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index 719a12b0ae..ddb8e80b69 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -113,6 +113,24 @@ class ReceiptsWorkerStore(SQLBaseStore):
             prefilled_cache=receipts_stream_prefill,
         )
 
+        self.db_pool.updates.register_background_index_update(
+            "receipts_linearized_unique_index",
+            index_name="receipts_linearized_unique_index",
+            table="receipts_linearized",
+            columns=["room_id", "receipt_type", "user_id"],
+            where_clause="thread_id IS NULL",
+            unique=True,
+        )
+
+        self.db_pool.updates.register_background_index_update(
+            "receipts_graph_unique_index",
+            index_name="receipts_graph_unique_index",
+            table="receipts_graph",
+            columns=["room_id", "receipt_type", "user_id"],
+            where_clause="thread_id IS NULL",
+            unique=True,
+        )
+
     def get_max_receipt_stream_id(self) -> int:
         """Get the current max stream ID for receipts stream"""
         return self._receipts_id_gen.get_current_token()
@@ -677,6 +695,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
                 "event_id": event_id,
                 "event_stream_ordering": stream_ordering,
                 "data": json_encoder.encode(data),
+                "thread_id": None,
             },
             # receipts_linearized has a unique constraint on
             # (user_id, room_id, receipt_type), so no need to lock
@@ -824,6 +843,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
             values={
                 "event_ids": json_encoder.encode(event_ids),
                 "data": json_encoder.encode(data),
+                "thread_id": None,
             },
             # receipts_graph has a unique constraint on
             # (user_id, room_id, receipt_type), so no need to lock
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 32cda5e3ba..38c9532bfd 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-SCHEMA_VERSION = 72  # remember to update the list below when updating
+SCHEMA_VERSION = 73  # remember to update the list below when updating
 """Represents the expectations made by the codebase about the database schema
 
 This should be incremented whenever the codebase changes its requirements on the
@@ -77,6 +77,10 @@ Changes in SCHEMA_VERSION = 72:
     - Tables related to groups are dropped.
     - Unused column application_services_state.last_txn is dropped
     - Cache invalidation stream id sequence now begins at 2 to match code expectation.
+
+Changes in SCHEMA_VERSION = 73;
+    - thread_id column is added to event_push_actions, event_push_actions_staging
+      event_push_summary, receipts_linearized, and receipts_graph.
 """
 
 
diff --git a/synapse/storage/schema/main/delta/72/06thread_notifications.sql b/synapse/storage/schema/main/delta/72/06thread_notifications.sql
new file mode 100644
index 0000000000..2f4f5dac7a
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/06thread_notifications.sql
@@ -0,0 +1,30 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Add a nullable column for thread ID to the event push actions tables; this
+-- will be filled in with a default value for any previously existing rows.
+--
+-- After migration this can be made non-nullable.
+
+ALTER TABLE event_push_actions_staging ADD COLUMN thread_id TEXT;
+ALTER TABLE event_push_actions ADD COLUMN thread_id TEXT;
+ALTER TABLE event_push_summary ADD COLUMN thread_id TEXT;
+
+-- Update the unique index for `event_push_summary`.
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7006, 'event_push_summary_unique_index2', '{}');
+
+INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES
+  (7006, 'event_push_backfill_thread_id', '{}', 'event_push_summary_unique_index2');
diff --git a/synapse/storage/schema/main/delta/72/07thread_receipts.sql.postgres b/synapse/storage/schema/main/delta/72/07thread_receipts.sql.postgres
new file mode 100644
index 0000000000..55fff9e278
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/07thread_receipts.sql.postgres
@@ -0,0 +1,30 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Add a nullable column for thread ID to the receipts table; this allows a
+-- receipt per user, per room, as well as an unthreaded receipt (corresponding
+-- to a null thread ID).
+
+ALTER TABLE receipts_linearized ADD COLUMN thread_id TEXT;
+ALTER TABLE receipts_graph ADD COLUMN thread_id TEXT;
+
+-- Rebuild the unique constraint with the thread_id.
+ALTER TABLE receipts_linearized
+    ADD CONSTRAINT receipts_linearized_uniqueness_thread
+        UNIQUE (room_id, receipt_type, user_id, thread_id);
+
+ALTER TABLE receipts_graph
+    ADD CONSTRAINT receipts_graph_uniqueness_thread
+        UNIQUE (room_id, receipt_type, user_id, thread_id);
diff --git a/synapse/storage/schema/main/delta/72/07thread_receipts.sql.sqlite b/synapse/storage/schema/main/delta/72/07thread_receipts.sql.sqlite
new file mode 100644
index 0000000000..232f67deb4
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/07thread_receipts.sql.sqlite
@@ -0,0 +1,70 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Allow multiple receipts per user per room via a nullable thread_id column.
+--
+-- SQLite doesn't support modifying constraints to an existing table, so it must
+-- be recreated.
+
+-- Create the new tables.
+CREATE TABLE receipts_linearized_new (
+    stream_id BIGINT NOT NULL,
+    room_id TEXT NOT NULL,
+    receipt_type TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    event_id TEXT NOT NULL,
+    thread_id TEXT,
+    event_stream_ordering BIGINT,
+    data TEXT NOT NULL,
+    CONSTRAINT receipts_linearized_uniqueness UNIQUE (room_id, receipt_type, user_id),
+    CONSTRAINT receipts_linearized_uniqueness_thread UNIQUE (room_id, receipt_type, user_id, thread_id)
+);
+
+CREATE TABLE receipts_graph_new (
+    room_id TEXT NOT NULL,
+    receipt_type TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    event_ids TEXT NOT NULL,
+    thread_id TEXT,
+    data TEXT NOT NULL,
+    CONSTRAINT receipts_graph_uniqueness UNIQUE (room_id, receipt_type, user_id),
+    CONSTRAINT receipts_graph_uniqueness_thread UNIQUE (room_id, receipt_type, user_id, thread_id)
+);
+
+-- Drop the old indexes.
+DROP INDEX IF EXISTS receipts_linearized_id;
+DROP INDEX IF EXISTS receipts_linearized_room_stream;
+DROP INDEX IF EXISTS receipts_linearized_user;
+
+-- Copy the data.
+INSERT INTO receipts_linearized_new (stream_id, room_id, receipt_type, user_id, event_id, event_stream_ordering, data)
+    SELECT stream_id, room_id, receipt_type, user_id, event_id, event_stream_ordering, data
+    FROM receipts_linearized;
+INSERT INTO receipts_graph_new (room_id, receipt_type, user_id, event_ids, data)
+    SELECT room_id, receipt_type, user_id, event_ids, data
+    FROM receipts_graph;
+
+-- Drop the old tables.
+DROP TABLE receipts_linearized;
+DROP TABLE receipts_graph;
+
+-- Rename the tables.
+ALTER TABLE receipts_linearized_new RENAME TO receipts_linearized;
+ALTER TABLE receipts_graph_new RENAME TO receipts_graph;
+
+-- Create the indices.
+CREATE INDEX receipts_linearized_id ON receipts_linearized( stream_id );
+CREATE INDEX receipts_linearized_room_stream ON receipts_linearized( room_id, stream_id );
+CREATE INDEX receipts_linearized_user ON receipts_linearized( user_id );
diff --git a/synapse/storage/schema/main/delta/72/08thread_receipts.sql b/synapse/storage/schema/main/delta/72/08thread_receipts.sql
new file mode 100644
index 0000000000..e35b021f31
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/08thread_receipts.sql
@@ -0,0 +1,20 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7007, 'receipts_linearized_unique_index', '{}');
+
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7007, 'receipts_graph_unique_index', '{}');
diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py
index 531a0db2d0..49a21e2e85 100644
--- a/tests/replication/slave/storage/test_events.py
+++ b/tests/replication/slave/storage/test_events.py
@@ -404,6 +404,7 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase):
                 event.event_id,
                 {user_id: actions for user_id, actions in push_actions},
                 False,
+                "main",
             )
         )
         return event, context
-- 
cgit 1.5.1


From 957e3d74fc70f92bb9ed3c709f87752bf77a8c90 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Wed, 14 Sep 2022 13:57:50 -0500
Subject: Keep track when we try and fail to process a pulled event (#13589)

We can follow-up this PR with:

 1. Only try to backfill from an event if we haven't tried recently -> https://github.com/matrix-org/synapse/issues/13622
 1. When we decide to backfill that event again, process it in the background so it doesn't block and make `/messages` slow when we know it will probably fail again -> https://github.com/matrix-org/synapse/issues/13623
 1. Generally track failures everywhere we try and fail to pull an event over federation -> https://github.com/matrix-org/synapse/issues/13700

Fix https://github.com/matrix-org/synapse/issues/13621

Part of https://github.com/matrix-org/synapse/issues/13356

Mentioned in [internal doc](https://docs.google.com/document/d/1lvUoVfYUiy6UaHB6Rb4HicjaJAU40-APue9Q4vzuW3c/edit#bookmark=id.qv7cj51sv9i5)
---
 changelog.d/13589.feature                          |   1 +
 synapse/handlers/federation_event.py               |   7 +
 synapse/storage/databases/main/event_federation.py |  45 +++++
 synapse/storage/databases/main/events.py           |  32 ++-
 synapse/storage/schema/__init__.py                 |   2 +
 .../main/delta/73/01event_failed_pull_attempts.sql |  29 +++
 tests/handlers/test_federation_event.py            | 222 +++++++++++++++++++++
 7 files changed, 329 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/13589.feature
 create mode 100644 synapse/storage/schema/main/delta/73/01event_failed_pull_attempts.sql

(limited to 'synapse')

diff --git a/changelog.d/13589.feature b/changelog.d/13589.feature
new file mode 100644
index 0000000000..78fa1ddb52
--- /dev/null
+++ b/changelog.d/13589.feature
@@ -0,0 +1 @@
+Keep track when we attempt to backfill an event but fail so we can intelligently back-off in the future.
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index ace7adcffb..9e065e1116 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -862,6 +862,9 @@ class FederationEventHandler:
             self._sanity_check_event(event)
         except SynapseError as err:
             logger.warning("Event %s failed sanity check: %s", event_id, err)
+            await self._store.record_event_failed_pull_attempt(
+                event.room_id, event_id, str(err)
+            )
             return
 
         try:
@@ -897,6 +900,10 @@ class FederationEventHandler:
                     backfilled=backfilled,
                 )
         except FederationError as e:
+            await self._store.record_event_failed_pull_attempt(
+                event.room_id, event_id, str(e)
+            )
+
             if e.code == 403:
                 logger.warning("Pulled event %s failed history check.", event_id)
             else:
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index ca47a22bf1..ef477978ed 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -1294,6 +1294,51 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
 
         return event_id_results
 
+    @trace
+    async def record_event_failed_pull_attempt(
+        self, room_id: str, event_id: str, cause: str
+    ) -> None:
+        """
+        Record when we fail to pull an event over federation.
+
+        This information allows us to be more intelligent when we decide to
+        retry (we don't need to fail over and over) and we can process that
+        event in the background so we don't block on it each time.
+
+        Args:
+            room_id: The room where the event failed to pull from
+            event_id: The event that failed to be fetched or processed
+            cause: The error message or reason that we failed to pull the event
+        """
+        await self.db_pool.runInteraction(
+            "record_event_failed_pull_attempt",
+            self._record_event_failed_pull_attempt_upsert_txn,
+            room_id,
+            event_id,
+            cause,
+            db_autocommit=True,  # Safe as it's a single upsert
+        )
+
+    def _record_event_failed_pull_attempt_upsert_txn(
+        self,
+        txn: LoggingTransaction,
+        room_id: str,
+        event_id: str,
+        cause: str,
+    ) -> None:
+        sql = """
+            INSERT INTO event_failed_pull_attempts (
+                room_id, event_id, num_attempts, last_attempt_ts, last_cause
+            )
+                VALUES (?, ?, ?, ?, ?)
+            ON CONFLICT (room_id, event_id) DO UPDATE SET
+                num_attempts=event_failed_pull_attempts.num_attempts + 1,
+                last_attempt_ts=EXCLUDED.last_attempt_ts,
+                last_cause=EXCLUDED.last_cause;
+        """
+
+        txn.execute(sql, (room_id, event_id, 1, self._clock.time_msec(), cause))
+
     async def get_missing_events(
         self,
         room_id: str,
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index c0b4080e4b..1b54a2eb57 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -2435,17 +2435,31 @@ class PersistEventsStore:
             "DELETE FROM event_backward_extremities"
             " WHERE event_id = ? AND room_id = ?"
         )
+        backward_extremity_tuples_to_remove = [
+            (ev.event_id, ev.room_id)
+            for ev in events
+            if not ev.internal_metadata.is_outlier()
+            # If we encountered an event with no prev_events, then we might
+            # as well remove it now because it won't ever have anything else
+            # to backfill from.
+            or len(ev.prev_event_ids()) == 0
+        ]
         txn.execute_batch(
             query,
-            [
-                (ev.event_id, ev.room_id)
-                for ev in events
-                if not ev.internal_metadata.is_outlier()
-                # If we encountered an event with no prev_events, then we might
-                # as well remove it now because it won't ever have anything else
-                # to backfill from.
-                or len(ev.prev_event_ids()) == 0
-            ],
+            backward_extremity_tuples_to_remove,
+        )
+
+        # Clear out the failed backfill attempts after we successfully pulled
+        # the event. Since we no longer need these events as backward
+        # extremities, it also means that they won't be backfilled from again so
+        # we no longer need to store the backfill attempts around it.
+        query = """
+            DELETE FROM event_failed_pull_attempts
+            WHERE event_id = ? and room_id = ?
+        """
+        txn.execute_batch(
+            query,
+            backward_extremity_tuples_to_remove,
         )
 
 
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 38c9532bfd..68e055c664 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -81,6 +81,8 @@ Changes in SCHEMA_VERSION = 72:
 Changes in SCHEMA_VERSION = 73;
     - thread_id column is added to event_push_actions, event_push_actions_staging
       event_push_summary, receipts_linearized, and receipts_graph.
+    - Add table `event_failed_pull_attempts` to keep track when we fail to pull
+      events over federation.
 """
 
 
diff --git a/synapse/storage/schema/main/delta/73/01event_failed_pull_attempts.sql b/synapse/storage/schema/main/delta/73/01event_failed_pull_attempts.sql
new file mode 100644
index 0000000000..d397ee1082
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/01event_failed_pull_attempts.sql
@@ -0,0 +1,29 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- Add a table that keeps track of when we failed to pull an event over
+-- federation (via /backfill, `/event`, `/get_missing_events`, etc). This allows
+-- us to be more intelligent when we decide to retry (we don't need to fail over
+-- and over) and we can process that event in the background so we don't block
+-- on it each time.
+CREATE TABLE IF NOT EXISTS event_failed_pull_attempts(
+    room_id TEXT NOT NULL REFERENCES rooms (room_id),
+    event_id TEXT NOT NULL,
+    num_attempts INT NOT NULL,
+    last_attempt_ts BIGINT NOT NULL,
+    last_cause TEXT NOT NULL,
+    PRIMARY KEY (room_id, event_id)
+);
diff --git a/tests/handlers/test_federation_event.py b/tests/handlers/test_federation_event.py
index 51c8dd6498..b5b89405a4 100644
--- a/tests/handlers/test_federation_event.py
+++ b/tests/handlers/test_federation_event.py
@@ -227,3 +227,225 @@ class FederationEventHandlerTests(unittest.FederatingHomeserverTestCase):
 
         if prev_exists_as_outlier:
             self.mock_federation_transport_client.get_event.assert_not_called()
+
+    def test_process_pulled_event_records_failed_backfill_attempts(
+        self,
+    ) -> None:
+        """
+        Test to make sure that failed backfill attempts for an event are
+        recorded in the `event_failed_pull_attempts` table.
+
+        In this test, we pretend we are processing a "pulled" event via
+        backfill. The pulled event has a fake `prev_event` which our server has
+        obviously never seen before so it attempts to request the state at that
+        `prev_event` which expectedly fails because it's a fake event. Because
+        the server can't fetch the state at the missing `prev_event`, the
+        "pulled" event fails the history check and is fails to process.
+
+        We check that we correctly record the number of failed pull attempts
+        of the pulled event and as a sanity check, that the "pulled" event isn't
+        persisted.
+        """
+        OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}"
+        main_store = self.hs.get_datastores().main
+
+        # Create the room
+        user_id = self.register_user("kermit", "test")
+        tok = self.login("kermit", "test")
+        room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
+        room_version = self.get_success(main_store.get_room_version(room_id))
+
+        # We expect an outbound request to /state_ids, so stub that out
+        self.mock_federation_transport_client.get_room_state_ids.return_value = make_awaitable(
+            {
+                # Mimic the other server not knowing about the state at all.
+                # We want to cause Synapse to throw an error (`Unable to get
+                # missing prev_event $fake_prev_event`) and fail to backfill
+                # the pulled event.
+                "pdu_ids": [],
+                "auth_chain_ids": [],
+            }
+        )
+        # We also expect an outbound request to /state
+        self.mock_federation_transport_client.get_room_state.return_value = make_awaitable(
+            StateRequestResponse(
+                # Mimic the other server not knowing about the state at all.
+                # We want to cause Synapse to throw an error (`Unable to get
+                # missing prev_event $fake_prev_event`) and fail to backfill
+                # the pulled event.
+                auth_events=[],
+                state=[],
+            )
+        )
+
+        pulled_event = make_event_from_dict(
+            self.add_hashes_and_signatures_from_other_server(
+                {
+                    "type": "test_regular_type",
+                    "room_id": room_id,
+                    "sender": OTHER_USER,
+                    "prev_events": [
+                        # The fake prev event will make the pulled event fail
+                        # the history check (`Unable to get missing prev_event
+                        # $fake_prev_event`)
+                        "$fake_prev_event"
+                    ],
+                    "auth_events": [],
+                    "origin_server_ts": 1,
+                    "depth": 12,
+                    "content": {"body": "pulled"},
+                }
+            ),
+            room_version,
+        )
+
+        # The function under test: try to process the pulled event
+        with LoggingContext("test"):
+            self.get_success(
+                self.hs.get_federation_event_handler()._process_pulled_event(
+                    self.OTHER_SERVER_NAME, pulled_event, backfilled=True
+                )
+            )
+
+        # Make sure our failed pull attempt was recorded
+        backfill_num_attempts = self.get_success(
+            main_store.db_pool.simple_select_one_onecol(
+                table="event_failed_pull_attempts",
+                keyvalues={"event_id": pulled_event.event_id},
+                retcol="num_attempts",
+            )
+        )
+        self.assertEqual(backfill_num_attempts, 1)
+
+        # The function under test: try to process the pulled event again
+        with LoggingContext("test"):
+            self.get_success(
+                self.hs.get_federation_event_handler()._process_pulled_event(
+                    self.OTHER_SERVER_NAME, pulled_event, backfilled=True
+                )
+            )
+
+        # Make sure our second failed pull attempt was recorded (`num_attempts` was incremented)
+        backfill_num_attempts = self.get_success(
+            main_store.db_pool.simple_select_one_onecol(
+                table="event_failed_pull_attempts",
+                keyvalues={"event_id": pulled_event.event_id},
+                retcol="num_attempts",
+            )
+        )
+        self.assertEqual(backfill_num_attempts, 2)
+
+        # And as a sanity check, make sure the event was not persisted through all of this.
+        persisted = self.get_success(
+            main_store.get_event(pulled_event.event_id, allow_none=True)
+        )
+        self.assertIsNone(
+            persisted,
+            "pulled event that fails the history check should not be persisted at all",
+        )
+
+    def test_process_pulled_event_clears_backfill_attempts_after_being_successfully_persisted(
+        self,
+    ) -> None:
+        """
+        Test to make sure that failed pull attempts
+        (`event_failed_pull_attempts` table) for an event are cleared after the
+        event is successfully persisted.
+
+        In this test, we pretend we are processing a "pulled" event via
+        backfill. The pulled event succesfully processes and the backward
+        extremeties are updated along with clearing out any failed pull attempts
+        for those old extremities.
+
+        We check that we correctly cleared failed pull attempts of the
+        pulled event.
+        """
+        OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}"
+        main_store = self.hs.get_datastores().main
+
+        # Create the room
+        user_id = self.register_user("kermit", "test")
+        tok = self.login("kermit", "test")
+        room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
+        room_version = self.get_success(main_store.get_room_version(room_id))
+
+        # allow the remote user to send state events
+        self.helper.send_state(
+            room_id,
+            "m.room.power_levels",
+            {"events_default": 0, "state_default": 0},
+            tok=tok,
+        )
+
+        # add the remote user to the room
+        member_event = self.get_success(
+            event_injection.inject_member_event(self.hs, room_id, OTHER_USER, "join")
+        )
+
+        initial_state_map = self.get_success(
+            main_store.get_partial_current_state_ids(room_id)
+        )
+
+        auth_event_ids = [
+            initial_state_map[("m.room.create", "")],
+            initial_state_map[("m.room.power_levels", "")],
+            member_event.event_id,
+        ]
+
+        pulled_event = make_event_from_dict(
+            self.add_hashes_and_signatures_from_other_server(
+                {
+                    "type": "test_regular_type",
+                    "room_id": room_id,
+                    "sender": OTHER_USER,
+                    "prev_events": [member_event.event_id],
+                    "auth_events": auth_event_ids,
+                    "origin_server_ts": 1,
+                    "depth": 12,
+                    "content": {"body": "pulled"},
+                }
+            ),
+            room_version,
+        )
+
+        # Fake the "pulled" event failing to backfill once so we can test
+        # if it's cleared out later on.
+        self.get_success(
+            main_store.record_event_failed_pull_attempt(
+                pulled_event.room_id, pulled_event.event_id, "fake cause"
+            )
+        )
+        # Make sure we have a failed pull attempt recorded for the pulled event
+        backfill_num_attempts = self.get_success(
+            main_store.db_pool.simple_select_one_onecol(
+                table="event_failed_pull_attempts",
+                keyvalues={"event_id": pulled_event.event_id},
+                retcol="num_attempts",
+            )
+        )
+        self.assertEqual(backfill_num_attempts, 1)
+
+        # The function under test: try to process the pulled event
+        with LoggingContext("test"):
+            self.get_success(
+                self.hs.get_federation_event_handler()._process_pulled_event(
+                    self.OTHER_SERVER_NAME, pulled_event, backfilled=True
+                )
+            )
+
+        # Make sure the failed pull attempts for the pulled event are cleared
+        backfill_num_attempts = self.get_success(
+            main_store.db_pool.simple_select_one_onecol(
+                table="event_failed_pull_attempts",
+                keyvalues={"event_id": pulled_event.event_id},
+                retcol="num_attempts",
+                allow_none=True,
+            )
+        )
+        self.assertIsNone(backfill_num_attempts)
+
+        # And as a sanity check, make sure the "pulled" event was persisted.
+        persisted = self.get_success(
+            main_store.get_event(pulled_event.event_id, allow_none=True)
+        )
+        self.assertIsNotNone(persisted, "pulled event was not persisted at all")
-- 
cgit 1.5.1


From 918c74bfb57e3ca4d300ed9a3bfb99b99126f821 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Thu, 15 Sep 2022 13:57:16 +0100
Subject: Add a `MXCUri` class to make working with mxc uri's easier. (#13162)

---
 changelog.d/13162.misc                    |   1 +
 poetry.lock                               |  10 +--
 pyproject.toml                            |   2 +-
 synapse/rest/media/v1/media_repository.py |   6 +-
 synapse/rest/media/v1/upload_resource.py  |   6 +-
 tests/rest/media/test_media_retention.py  | 102 +++++++++++-------------------
 6 files changed, 53 insertions(+), 74 deletions(-)
 create mode 100644 changelog.d/13162.misc

(limited to 'synapse')

diff --git a/changelog.d/13162.misc b/changelog.d/13162.misc
new file mode 100644
index 0000000000..b0d7c05e74
--- /dev/null
+++ b/changelog.d/13162.misc
@@ -0,0 +1 @@
+Bump the minimum dependency of `matrix_common` to 1.3.0 to make use of the `MXCUri` class. Use `MXCUri` to simplify media retention test code.
\ No newline at end of file
diff --git a/poetry.lock b/poetry.lock
index cdc69f8ea9..291f3c51e6 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -524,11 +524,11 @@ python-versions = ">=3.7"
 
 [[package]]
 name = "matrix-common"
-version = "1.2.1"
+version = "1.3.0"
 description = "Common utilities for Synapse, Sydent and Sygnal"
 category = "main"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 
 [package.dependencies]
 attrs = "*"
@@ -1625,7 +1625,7 @@ url_preview = ["lxml"]
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.7.1"
-content-hash = "79cfa09d59f9f8b5ef24318fb860df1915f54328692aa56d04331ecbdd92a8cb"
+content-hash = "1b14fc274d9e2a495a7f864150f3ffcf4d9f585e09a67e53301ae4ef3c2f3e48"
 
 [metadata.files]
 attrs = [
@@ -2113,8 +2113,8 @@ markupsafe = [
     {file = "MarkupSafe-2.1.0.tar.gz", hash = "sha256:80beaf63ddfbc64a0452b841d8036ca0611e049650e20afcb882f5d3c266d65f"},
 ]
 matrix-common = [
-    {file = "matrix_common-1.2.1-py3-none-any.whl", hash = "sha256:946709c405944a0d4b1d73207b77eb064b6dbfc5d70a69471320b06d8ce98b20"},
-    {file = "matrix_common-1.2.1.tar.gz", hash = "sha256:a99dcf02a6bd95b24a5a61b354888a2ac92bf2b4b839c727b8dd9da2cdfa3853"},
+    {file = "matrix_common-1.3.0-py3-none-any.whl", hash = "sha256:524e2785b9b03be4d15f3a8a6b857c5b6af68791ffb1b9918f0ad299abc4db20"},
+    {file = "matrix_common-1.3.0.tar.gz", hash = "sha256:62e121cccd9f243417b57ec37a76dc44aeb198a7a5c67afd6b8275992ff2abd1"},
 ]
 matrix-synapse-ldap3 = [
     {file = "matrix-synapse-ldap3-0.2.2.tar.gz", hash = "sha256:b388d95693486eef69adaefd0fd9e84463d52fe17b0214a00efcaa669b73cb74"},
diff --git a/pyproject.toml b/pyproject.toml
index 157385ad8a..8e50dd2852 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -164,7 +164,7 @@ typing-extensions = ">=3.10.0.1"
 cryptography = ">=3.4.7"
 # ijson 3.1.4 fixes a bug with "." in property names
 ijson = ">=3.1.4"
-matrix-common = "^1.2.1"
+matrix-common = "^1.3.0"
 # We need packaging.requirements.Requirement, added in 16.1.
 packaging = ">=16.1"
 # At the time of writing, we only use functions from the version `importlib.metadata`
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index 9dd3c8d4bb..328c0c5477 100644
--- a/synapse/rest/media/v1/media_repository.py
+++ b/synapse/rest/media/v1/media_repository.py
@@ -19,6 +19,8 @@ import shutil
 from io import BytesIO
 from typing import IO, TYPE_CHECKING, Dict, List, Optional, Set, Tuple
 
+from matrix_common.types.mxc_uri import MXCUri
+
 import twisted.internet.error
 import twisted.web.http
 from twisted.internet.defer import Deferred
@@ -186,7 +188,7 @@ class MediaRepository:
         content: IO,
         content_length: int,
         auth_user: UserID,
-    ) -> str:
+    ) -> MXCUri:
         """Store uploaded content for a local user and return the mxc URL
 
         Args:
@@ -219,7 +221,7 @@ class MediaRepository:
 
         await self._generate_thumbnails(None, media_id, media_id, media_type)
 
-        return "mxc://%s/%s" % (self.server_name, media_id)
+        return MXCUri(self.server_name, media_id)
 
     async def get_local_media(
         self, request: SynapseRequest, media_id: str, name: Optional[str]
diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py
index e73e431dc9..97548b54e5 100644
--- a/synapse/rest/media/v1/upload_resource.py
+++ b/synapse/rest/media/v1/upload_resource.py
@@ -101,6 +101,8 @@ class UploadResource(DirectServeJsonResource):
             # the default 404, as that would just be confusing.
             raise SynapseError(400, "Bad content")
 
-        logger.info("Uploaded content with URI %r", content_uri)
+        logger.info("Uploaded content with URI '%s'", content_uri)
 
-        respond_with_json(request, 200, {"content_uri": content_uri}, send_cors=True)
+        respond_with_json(
+            request, 200, {"content_uri": str(content_uri)}, send_cors=True
+        )
diff --git a/tests/rest/media/test_media_retention.py b/tests/rest/media/test_media_retention.py
index 14af07c5af..23f227aed6 100644
--- a/tests/rest/media/test_media_retention.py
+++ b/tests/rest/media/test_media_retention.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import io
-from typing import Iterable, Optional, Tuple
+from typing import Iterable, Optional
+
+from matrix_common.types.mxc_uri import MXCUri
 
 from twisted.test.proto_helpers import MemoryReactor
 
@@ -63,9 +65,9 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
             last_accessed_ms: Optional[int],
             is_quarantined: Optional[bool] = False,
             is_protected: Optional[bool] = False,
-        ) -> str:
+        ) -> MXCUri:
             # "Upload" some media to the local media store
-            mxc_uri = self.get_success(
+            mxc_uri: MXCUri = self.get_success(
                 media_repository.create_content(
                     media_type="text/plain",
                     upload_name=None,
@@ -75,13 +77,11 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
                 )
             )
 
-            media_id = mxc_uri.split("/")[-1]
-
             # Set the last recently accessed time for this media
             if last_accessed_ms is not None:
                 self.get_success(
                     self.store.update_cached_last_access_time(
-                        local_media=(media_id,),
+                        local_media=(mxc_uri.media_id,),
                         remote_media=(),
                         time_ms=last_accessed_ms,
                     )
@@ -92,7 +92,7 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
                 self.get_success(
                     self.store.quarantine_media_by_id(
                         server_name=self.hs.config.server.server_name,
-                        media_id=media_id,
+                        media_id=mxc_uri.media_id,
                         quarantined_by="@theadmin:test",
                     )
                 )
@@ -101,18 +101,18 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
                 # Mark this media as protected from quarantine
                 self.get_success(
                     self.store.mark_local_media_as_safe(
-                        media_id=media_id,
+                        media_id=mxc_uri.media_id,
                         safe=True,
                     )
                 )
 
-            return media_id
+            return mxc_uri
 
         def _cache_remote_media_and_set_attributes(
             media_id: str,
             last_accessed_ms: Optional[int],
             is_quarantined: Optional[bool] = False,
-        ) -> str:
+        ) -> MXCUri:
             # Pretend to cache some remote media
             self.get_success(
                 self.store.store_cached_remote_media(
@@ -146,7 +146,7 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
                     )
                 )
 
-            return media_id
+            return MXCUri(self.remote_server_name, media_id)
 
         # Start with the local media store
         self.local_recently_accessed_media = _create_media_and_set_attributes(
@@ -214,28 +214,16 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
         # Remote media should be unaffected.
         self._assert_if_mxc_uris_purged(
             purged=[
-                (
-                    self.hs.config.server.server_name,
-                    self.local_not_recently_accessed_media,
-                ),
-                (self.hs.config.server.server_name, self.local_never_accessed_media),
+                self.local_not_recently_accessed_media,
+                self.local_never_accessed_media,
             ],
             not_purged=[
-                (self.hs.config.server.server_name, self.local_recently_accessed_media),
-                (
-                    self.hs.config.server.server_name,
-                    self.local_not_recently_accessed_quarantined_media,
-                ),
-                (
-                    self.hs.config.server.server_name,
-                    self.local_not_recently_accessed_protected_media,
-                ),
-                (self.remote_server_name, self.remote_recently_accessed_media),
-                (self.remote_server_name, self.remote_not_recently_accessed_media),
-                (
-                    self.remote_server_name,
-                    self.remote_not_recently_accessed_quarantined_media,
-                ),
+                self.local_recently_accessed_media,
+                self.local_not_recently_accessed_quarantined_media,
+                self.local_not_recently_accessed_protected_media,
+                self.remote_recently_accessed_media,
+                self.remote_not_recently_accessed_media,
+                self.remote_not_recently_accessed_quarantined_media,
             ],
         )
 
@@ -261,49 +249,35 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
         # Remote media accessed <30 days ago should still exist.
         self._assert_if_mxc_uris_purged(
             purged=[
-                (self.remote_server_name, self.remote_not_recently_accessed_media),
+                self.remote_not_recently_accessed_media,
             ],
             not_purged=[
-                (self.remote_server_name, self.remote_recently_accessed_media),
-                (self.hs.config.server.server_name, self.local_recently_accessed_media),
-                (
-                    self.hs.config.server.server_name,
-                    self.local_not_recently_accessed_media,
-                ),
-                (
-                    self.hs.config.server.server_name,
-                    self.local_not_recently_accessed_quarantined_media,
-                ),
-                (
-                    self.hs.config.server.server_name,
-                    self.local_not_recently_accessed_protected_media,
-                ),
-                (
-                    self.remote_server_name,
-                    self.remote_not_recently_accessed_quarantined_media,
-                ),
-                (self.hs.config.server.server_name, self.local_never_accessed_media),
+                self.remote_recently_accessed_media,
+                self.local_recently_accessed_media,
+                self.local_not_recently_accessed_media,
+                self.local_not_recently_accessed_quarantined_media,
+                self.local_not_recently_accessed_protected_media,
+                self.remote_not_recently_accessed_quarantined_media,
+                self.local_never_accessed_media,
             ],
         )
 
     def _assert_if_mxc_uris_purged(
-        self, purged: Iterable[Tuple[str, str]], not_purged: Iterable[Tuple[str, str]]
+        self, purged: Iterable[MXCUri], not_purged: Iterable[MXCUri]
     ) -> None:
-        def _assert_mxc_uri_purge_state(
-            server_name: str, media_id: str, expect_purged: bool
-        ) -> None:
+        def _assert_mxc_uri_purge_state(mxc_uri: MXCUri, expect_purged: bool) -> None:
             """Given an MXC URI, assert whether it has been purged or not."""
-            if server_name == self.hs.config.server.server_name:
+            if mxc_uri.server_name == self.hs.config.server.server_name:
                 found_media_dict = self.get_success(
-                    self.store.get_local_media(media_id)
+                    self.store.get_local_media(mxc_uri.media_id)
                 )
             else:
                 found_media_dict = self.get_success(
-                    self.store.get_cached_remote_media(server_name, media_id)
+                    self.store.get_cached_remote_media(
+                        mxc_uri.server_name, mxc_uri.media_id
+                    )
                 )
 
-            mxc_uri = f"mxc://{server_name}/{media_id}"
-
             if expect_purged:
                 self.assertIsNone(
                     found_media_dict, msg=f"{mxc_uri} unexpectedly not purged"
@@ -315,7 +289,7 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
                 )
 
         # Assert that the given MXC URIs have either been correctly purged or not.
-        for server_name, media_id in purged:
-            _assert_mxc_uri_purge_state(server_name, media_id, expect_purged=True)
-        for server_name, media_id in not_purged:
-            _assert_mxc_uri_purge_state(server_name, media_id, expect_purged=False)
+        for mxc_uri in purged:
+            _assert_mxc_uri_purge_state(mxc_uri, expect_purged=True)
+        for mxc_uri in not_purged:
+            _assert_mxc_uri_purge_state(mxc_uri, expect_purged=False)
-- 
cgit 1.5.1


From 742f9f9d78490f7f16bdb607a8f61ca258d520ef Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 15 Sep 2022 18:36:02 +0100
Subject: A third batch of Pydantic validation for rest/client/account.py
 (#13736)

---
 changelog.d/13736.feature        |  1 +
 synapse/rest/client/account.py   | 65 ++++++++++++++++++++++------------------
 synapse/rest/client/models.py    | 28 +++++++++--------
 tests/rest/client/test_models.py | 29 ++++++++++++++++--
 4 files changed, 78 insertions(+), 45 deletions(-)
 create mode 100644 changelog.d/13736.feature

(limited to 'synapse')

diff --git a/changelog.d/13736.feature b/changelog.d/13736.feature
new file mode 100644
index 0000000000..60a63c1009
--- /dev/null
+++ b/changelog.d/13736.feature
@@ -0,0 +1 @@
+Improve validation of request bodies for the following client-server API endpoints: [`/account/3pid/add`](https://spec.matrix.org/v1.3/client-server-api/#post_matrixclientv3account3pidadd), [`/account/3pid/bind`](https://spec.matrix.org/v1.3/client-server-api/#post_matrixclientv3account3pidbind), [`/account/3pid/delete`](https://spec.matrix.org/v1.3/client-server-api/#post_matrixclientv3account3piddelete) and [`/account/3pid/unbind`](https://spec.matrix.org/v1.3/client-server-api/#post_matrixclientv3account3pidunbind).
diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py
index a09aaf3448..2db2a04f95 100644
--- a/synapse/rest/client/account.py
+++ b/synapse/rest/client/account.py
@@ -19,6 +19,7 @@ from typing import TYPE_CHECKING, List, Optional, Tuple
 from urllib.parse import urlparse
 
 from pydantic import StrictBool, StrictStr, constr
+from typing_extensions import Literal
 
 from twisted.web.server import Request
 
@@ -43,6 +44,7 @@ from synapse.metrics import threepid_send_requests
 from synapse.push.mailer import Mailer
 from synapse.rest.client.models import (
     AuthenticationData,
+    ClientSecretStr,
     EmailRequestTokenBody,
     MsisdnRequestTokenBody,
 )
@@ -627,6 +629,11 @@ class ThreepidAddRestServlet(RestServlet):
         self.auth = hs.get_auth()
         self.auth_handler = hs.get_auth_handler()
 
+    class PostBody(RequestBodyModel):
+        auth: Optional[AuthenticationData] = None
+        client_secret: ClientSecretStr
+        sid: StrictStr
+
     @interactive_auth_handler
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         if not self.hs.config.registration.enable_3pid_changes:
@@ -636,22 +643,17 @@ class ThreepidAddRestServlet(RestServlet):
 
         requester = await self.auth.get_user_by_req(request)
         user_id = requester.user.to_string()
-        body = parse_json_object_from_request(request)
-
-        assert_params_in_dict(body, ["client_secret", "sid"])
-        sid = body["sid"]
-        client_secret = body["client_secret"]
-        assert_valid_client_secret(client_secret)
+        body = parse_and_validate_json_object_from_request(request, self.PostBody)
 
         await self.auth_handler.validate_user_via_ui_auth(
             requester,
             request,
-            body,
+            body.dict(exclude_unset=True),
             "add a third-party identifier to your account",
         )
 
         validation_session = await self.identity_handler.validate_threepid_session(
-            client_secret, sid
+            body.client_secret, body.sid
         )
         if validation_session:
             await self.auth_handler.add_threepid(
@@ -676,23 +678,20 @@ class ThreepidBindRestServlet(RestServlet):
         self.identity_handler = hs.get_identity_handler()
         self.auth = hs.get_auth()
 
-    async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
-        body = parse_json_object_from_request(request)
+    class PostBody(RequestBodyModel):
+        client_secret: ClientSecretStr
+        id_access_token: StrictStr
+        id_server: StrictStr
+        sid: StrictStr
 
-        assert_params_in_dict(
-            body, ["id_server", "sid", "id_access_token", "client_secret"]
-        )
-        id_server = body["id_server"]
-        sid = body["sid"]
-        id_access_token = body["id_access_token"]
-        client_secret = body["client_secret"]
-        assert_valid_client_secret(client_secret)
+    async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
+        body = parse_and_validate_json_object_from_request(request, self.PostBody)
 
         requester = await self.auth.get_user_by_req(request)
         user_id = requester.user.to_string()
 
         await self.identity_handler.bind_threepid(
-            client_secret, sid, user_id, id_server, id_access_token
+            body.client_secret, body.sid, user_id, body.id_server, body.id_access_token
         )
 
         return 200, {}
@@ -708,23 +707,27 @@ class ThreepidUnbindRestServlet(RestServlet):
         self.auth = hs.get_auth()
         self.datastore = self.hs.get_datastores().main
 
+    class PostBody(RequestBodyModel):
+        address: StrictStr
+        id_server: Optional[StrictStr] = None
+        medium: Literal["email", "msisdn"]
+
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         """Unbind the given 3pid from a specific identity server, or identity servers that are
         known to have this 3pid bound
         """
         requester = await self.auth.get_user_by_req(request)
-        body = parse_json_object_from_request(request)
-        assert_params_in_dict(body, ["medium", "address"])
-
-        medium = body.get("medium")
-        address = body.get("address")
-        id_server = body.get("id_server")
+        body = parse_and_validate_json_object_from_request(request, self.PostBody)
 
         # Attempt to unbind the threepid from an identity server. If id_server is None, try to
         # unbind from all identity servers this threepid has been added to in the past
         result = await self.identity_handler.try_unbind_threepid(
             requester.user.to_string(),
-            {"address": address, "medium": medium, "id_server": id_server},
+            {
+                "address": body.address,
+                "medium": body.medium,
+                "id_server": body.id_server,
+            },
         )
         return 200, {"id_server_unbind_result": "success" if result else "no-support"}
 
@@ -738,21 +741,25 @@ class ThreepidDeleteRestServlet(RestServlet):
         self.auth = hs.get_auth()
         self.auth_handler = hs.get_auth_handler()
 
+    class PostBody(RequestBodyModel):
+        address: StrictStr
+        id_server: Optional[StrictStr] = None
+        medium: Literal["email", "msisdn"]
+
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         if not self.hs.config.registration.enable_3pid_changes:
             raise SynapseError(
                 400, "3PID changes are disabled on this server", Codes.FORBIDDEN
             )
 
-        body = parse_json_object_from_request(request)
-        assert_params_in_dict(body, ["medium", "address"])
+        body = parse_and_validate_json_object_from_request(request, self.PostBody)
 
         requester = await self.auth.get_user_by_req(request)
         user_id = requester.user.to_string()
 
         try:
             ret = await self.auth_handler.delete_threepid(
-                user_id, body["medium"], body["address"], body.get("id_server")
+                user_id, body.medium, body.address, body.id_server
             )
         except Exception:
             # NB. This endpoint should succeed if there is nothing to
diff --git a/synapse/rest/client/models.py b/synapse/rest/client/models.py
index 6278450c70..3d7940b0fc 100644
--- a/synapse/rest/client/models.py
+++ b/synapse/rest/client/models.py
@@ -36,18 +36,20 @@ class AuthenticationData(RequestBodyModel):
     type: Optional[StrictStr] = None
 
 
-class ThreePidRequestTokenBody(RequestBodyModel):
-    if TYPE_CHECKING:
-        client_secret: StrictStr
-    else:
-        # See also assert_valid_client_secret()
-        client_secret: constr(
-            regex="[0-9a-zA-Z.=_-]",  # noqa: F722
-            min_length=0,
-            max_length=255,
-            strict=True,
-        )
+if TYPE_CHECKING:
+    ClientSecretStr = StrictStr
+else:
+    # See also assert_valid_client_secret()
+    ClientSecretStr = constr(
+        regex="[0-9a-zA-Z.=_-]",  # noqa: F722
+        min_length=1,
+        max_length=255,
+        strict=True,
+    )
+
 
+class ThreepidRequestTokenBody(RequestBodyModel):
+    client_secret: ClientSecretStr
     id_server: Optional[StrictStr]
     id_access_token: Optional[StrictStr]
     next_link: Optional[StrictStr]
@@ -62,7 +64,7 @@ class ThreePidRequestTokenBody(RequestBodyModel):
         return token
 
 
-class EmailRequestTokenBody(ThreePidRequestTokenBody):
+class EmailRequestTokenBody(ThreepidRequestTokenBody):
     email: StrictStr
 
     # Canonicalise the email address. The addresses are all stored canonicalised
@@ -80,6 +82,6 @@ else:
     ISO3116_1_Alpha_2 = constr(regex="[A-Z]{2}", strict=True)
 
 
-class MsisdnRequestTokenBody(ThreePidRequestTokenBody):
+class MsisdnRequestTokenBody(ThreepidRequestTokenBody):
     country: ISO3116_1_Alpha_2
     phone_number: StrictStr
diff --git a/tests/rest/client/test_models.py b/tests/rest/client/test_models.py
index a9da00665e..0b8fcb0c47 100644
--- a/tests/rest/client/test_models.py
+++ b/tests/rest/client/test_models.py
@@ -11,14 +11,37 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import unittest
+import unittest as stdlib_unittest
 
-from pydantic import ValidationError
+from pydantic import BaseModel, ValidationError
+from typing_extensions import Literal
 
 from synapse.rest.client.models import EmailRequestTokenBody
 
 
-class EmailRequestTokenBodyTestCase(unittest.TestCase):
+class ThreepidMediumEnumTestCase(stdlib_unittest.TestCase):
+    class Model(BaseModel):
+        medium: Literal["email", "msisdn"]
+
+    def test_accepts_valid_medium_string(self) -> None:
+        """Sanity check that Pydantic behaves sensibly with an enum-of-str
+
+        This is arguably more of a test of a class that inherits from str and Enum
+        simultaneously.
+        """
+        model = self.Model.parse_obj({"medium": "email"})
+        self.assertEqual(model.medium, "email")
+
+    def test_rejects_invalid_medium_value(self) -> None:
+        with self.assertRaises(ValidationError):
+            self.Model.parse_obj({"medium": "interpretive_dance"})
+
+    def test_rejects_invalid_medium_type(self) -> None:
+        with self.assertRaises(ValidationError):
+            self.Model.parse_obj({"medium": 123})
+
+
+class EmailRequestTokenBodyTestCase(stdlib_unittest.TestCase):
     base_request = {
         "client_secret": "hunter2",
         "email": "alice@wonderland.com",
-- 
cgit 1.5.1


From b2b0c8527957d89b36c0eafea70347c200c1d294 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 15 Sep 2022 14:28:48 -0400
Subject: Support providing an index predicate for upserts. (#13822)

This is useful to upsert against a table which has a unique
partial index while avoiding conflicts.
---
 changelog.d/13822.misc                |  1 +
 synapse/storage/background_updates.py |  1 +
 synapse/storage/database.py           | 30 +++++++++++++++++++++++-------
 3 files changed, 25 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/13822.misc

(limited to 'synapse')

diff --git a/changelog.d/13822.misc b/changelog.d/13822.misc
new file mode 100644
index 0000000000..dbc77cbcfa
--- /dev/null
+++ b/changelog.d/13822.misc
@@ -0,0 +1 @@
+Support providing an index predicate clause when doing upserts.
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index cf1eabc437..bf5e7ee7be 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -533,6 +533,7 @@ class BackgroundUpdater:
             index_name: name of index to add
             table: table to add index to
             columns: columns/expressions to include in index
+            where_clause: A WHERE clause to specify a partial unique index.
             unique: true to make a UNIQUE index
             psql_only: true to only create this index on psql databases (useful
                 for virtual sqlite tables)
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index e881bff7fb..921cd4dc5e 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -1191,6 +1191,7 @@ class DatabasePool:
         keyvalues: Dict[str, Any],
         values: Dict[str, Any],
         insertion_values: Optional[Dict[str, Any]] = None,
+        where_clause: Optional[str] = None,
         lock: bool = True,
     ) -> bool:
         """
@@ -1203,6 +1204,7 @@ class DatabasePool:
             keyvalues: The unique key tables and their new values
             values: The nonunique columns and their new values
             insertion_values: additional key/values to use only when inserting
+            where_clause: An index predicate to apply to the upsert.
             lock: True to lock the table when doing the upsert. Unused when performing
                 a native upsert.
         Returns:
@@ -1213,7 +1215,12 @@ class DatabasePool:
 
         if table not in self._unsafe_to_upsert_tables:
             return self.simple_upsert_txn_native_upsert(
-                txn, table, keyvalues, values, insertion_values=insertion_values
+                txn,
+                table,
+                keyvalues,
+                values,
+                insertion_values=insertion_values,
+                where_clause=where_clause,
             )
         else:
             return self.simple_upsert_txn_emulated(
@@ -1222,6 +1229,7 @@ class DatabasePool:
                 keyvalues,
                 values,
                 insertion_values=insertion_values,
+                where_clause=where_clause,
                 lock=lock,
             )
 
@@ -1232,6 +1240,7 @@ class DatabasePool:
         keyvalues: Dict[str, Any],
         values: Dict[str, Any],
         insertion_values: Optional[Dict[str, Any]] = None,
+        where_clause: Optional[str] = None,
         lock: bool = True,
     ) -> bool:
         """
@@ -1240,6 +1249,7 @@ class DatabasePool:
             keyvalues: The unique key tables and their new values
             values: The nonunique columns and their new values
             insertion_values: additional key/values to use only when inserting
+            where_clause: An index predicate to apply to the upsert.
             lock: True to lock the table when doing the upsert.
         Returns:
             Returns True if a row was inserted or updated (i.e. if `values` is
@@ -1259,14 +1269,17 @@ class DatabasePool:
             else:
                 return "%s = ?" % (key,)
 
+        # Generate a where clause of each keyvalue and optionally the provided
+        # index predicate.
+        where = [_getwhere(k) for k in keyvalues]
+        if where_clause:
+            where.append(where_clause)
+
         if not values:
             # If `values` is empty, then all of the values we care about are in
             # the unique key, so there is nothing to UPDATE. We can just do a
             # SELECT instead to see if it exists.
-            sql = "SELECT 1 FROM %s WHERE %s" % (
-                table,
-                " AND ".join(_getwhere(k) for k in keyvalues),
-            )
+            sql = "SELECT 1 FROM %s WHERE %s" % (table, " AND ".join(where))
             sqlargs = list(keyvalues.values())
             txn.execute(sql, sqlargs)
             if txn.fetchall():
@@ -1277,7 +1290,7 @@ class DatabasePool:
             sql = "UPDATE %s SET %s WHERE %s" % (
                 table,
                 ", ".join("%s = ?" % (k,) for k in values),
-                " AND ".join(_getwhere(k) for k in keyvalues),
+                " AND ".join(where),
             )
             sqlargs = list(values.values()) + list(keyvalues.values())
 
@@ -1307,6 +1320,7 @@ class DatabasePool:
         keyvalues: Dict[str, Any],
         values: Dict[str, Any],
         insertion_values: Optional[Dict[str, Any]] = None,
+        where_clause: Optional[str] = None,
     ) -> bool:
         """
         Use the native UPSERT functionality in PostgreSQL.
@@ -1316,6 +1330,7 @@ class DatabasePool:
             keyvalues: The unique key tables and their new values
             values: The nonunique columns and their new values
             insertion_values: additional key/values to use only when inserting
+            where_clause: An index predicate to apply to the upsert.
 
         Returns:
             Returns True if a row was inserted or updated (i.e. if `values` is
@@ -1331,11 +1346,12 @@ class DatabasePool:
             allvalues.update(values)
             latter = "UPDATE SET " + ", ".join(k + "=EXCLUDED." + k for k in values)
 
-        sql = ("INSERT INTO %s (%s) VALUES (%s) ON CONFLICT (%s) DO %s") % (
+        sql = "INSERT INTO %s (%s) VALUES (%s) ON CONFLICT (%s) %s DO %s" % (
             table,
             ", ".join(k for k in allvalues),
             ", ".join("?" for _ in allvalues),
             ", ".join(k for k in keyvalues),
+            f"WHERE {where_clause}" if where_clause else "",
             latter,
         )
         txn.execute(sql, list(allvalues.values()))
-- 
cgit 1.5.1


From 140af0cdb653bc2fef9474af06a5c5b525073998 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Thu, 15 Sep 2022 14:40:49 -0500
Subject: Record any exception when processing a pulled event (#13814)

Part of https://github.com/matrix-org/synapse/issues/13700 and https://github.com/matrix-org/synapse/issues/13356

Follow-up to https://github.com/matrix-org/synapse/pull/13589
---
 changelog.d/13589.feature            |  2 +-
 changelog.d/13814.feature            |  1 +
 synapse/handlers/federation_event.py | 10 ++++++++++
 3 files changed, 12 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13814.feature

(limited to 'synapse')

diff --git a/changelog.d/13589.feature b/changelog.d/13589.feature
index 78fa1ddb52..a5ea2bc82e 100644
--- a/changelog.d/13589.feature
+++ b/changelog.d/13589.feature
@@ -1 +1 @@
-Keep track when we attempt to backfill an event but fail so we can intelligently back-off in the future.
+Keep track when we fail to process a pulled event over federation so we can intelligently back-off in the future.
diff --git a/changelog.d/13814.feature b/changelog.d/13814.feature
new file mode 100644
index 0000000000..a5ea2bc82e
--- /dev/null
+++ b/changelog.d/13814.feature
@@ -0,0 +1 @@
+Keep track when we fail to process a pulled event over federation so we can intelligently back-off in the future.
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 9e065e1116..efcdb84057 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -866,6 +866,11 @@ class FederationEventHandler:
                 event.room_id, event_id, str(err)
             )
             return
+        except Exception as exc:
+            await self._store.record_event_failed_pull_attempt(
+                event.room_id, event_id, str(exc)
+            )
+            raise exc
 
         try:
             try:
@@ -908,6 +913,11 @@ class FederationEventHandler:
                 logger.warning("Pulled event %s failed history check.", event_id)
             else:
                 raise
+        except Exception as exc:
+            await self._store.record_event_failed_pull_attempt(
+                event.room_id, event_id, str(exc)
+            )
+            raise exc
 
     @trace
     async def _compute_event_context_with_maybe_missing_prevs(
-- 
cgit 1.5.1


From 5093cbf88da1c439f5bf16b7a4cf19246781bd93 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Thu, 15 Sep 2022 15:32:25 -0500
Subject: Be able to correlate timeouts in reverse-proxy layer in front of
 Synapse (pull request ID from header) (#13801)

Fix https://github.com/matrix-org/synapse/issues/13685

New config:

```diff
  listeners:
    - port: 8008
      tls: false
      type: http
      x_forwarded: true
+     request_id_header: "cf-ray"
      bind_addresses: ['::1', '127.0.0.1', '0.0.0.0']
```
---
 changelog.d/13801.feature                        |  1 +
 docs/reverse_proxy.md                            |  4 ++++
 docs/usage/configuration/config_documentation.md | 11 ++++++++++-
 synapse/config/server.py                         | 13 ++++++++++---
 synapse/http/site.py                             | 14 +++++++++++++-
 5 files changed, 38 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/13801.feature

(limited to 'synapse')

diff --git a/changelog.d/13801.feature b/changelog.d/13801.feature
new file mode 100644
index 0000000000..d7cedfd302
--- /dev/null
+++ b/changelog.d/13801.feature
@@ -0,0 +1 @@
+Add `listeners[x].request_id_header` config to specify which request header to extract and use as the request ID in order to correlate requests from a reverse-proxy.
diff --git a/docs/reverse_proxy.md b/docs/reverse_proxy.md
index d1618e8155..4e7a1d4435 100644
--- a/docs/reverse_proxy.md
+++ b/docs/reverse_proxy.md
@@ -45,6 +45,10 @@ listens to traffic on localhost. (Do not change `bind_addresses` to `127.0.0.1`
 when using a containerized Synapse, as that will prevent it from responding
 to proxied traffic.)
 
+Optionally, you can also set
+[`request_id_header`](../usage/configuration/config_documentation.md#listeners)
+so that the server extracts and re-uses the same request ID format that the
+reverse proxy is using.
 
 ## Reverse-proxy configuration examples
 
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index cd546041b2..69d305b62e 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -434,7 +434,16 @@ Sub-options for each listener include:
 * `tls`: set to true to enable TLS for this listener. Will use the TLS key/cert specified in tls_private_key_path / tls_certificate_path.
 
 * `x_forwarded`: Only valid for an 'http' listener. Set to true to use the X-Forwarded-For header as the client IP. Useful when Synapse is
-   behind a reverse-proxy.
+   behind a [reverse-proxy](../../reverse_proxy.md).
+
+* `request_id_header`: The header extracted from each incoming request that is
+   used as the basis for the request ID. The request ID is used in
+   [logs](../administration/request_log.md#request-log-format) and tracing to
+   correlate and match up requests. When unset, Synapse will automatically
+   generate sequential request IDs. This option is useful when Synapse is behind
+   a [reverse-proxy](../../reverse_proxy.md).
+
+   _Added in Synapse 1.68.0._
 
 * `resources`: Only valid for an 'http' listener. A list of resources to host
    on this port. Sub-options for each resource are:
diff --git a/synapse/config/server.py b/synapse/config/server.py
index c91df636d9..f2353ce5fb 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -206,6 +206,7 @@ class HttpListenerConfig:
     resources: List[HttpResourceConfig] = attr.Factory(list)
     additional_resources: Dict[str, dict] = attr.Factory(dict)
     tag: Optional[str] = None
+    request_id_header: Optional[str] = None
 
 
 @attr.s(slots=True, frozen=True, auto_attribs=True)
@@ -520,9 +521,11 @@ class ServerConfig(Config):
         ):
             raise ConfigError("allowed_avatar_mimetypes must be a list")
 
-        self.listeners = [
-            parse_listener_def(i, x) for i, x in enumerate(config.get("listeners", []))
-        ]
+        listeners = config.get("listeners", [])
+        if not isinstance(listeners, list):
+            raise ConfigError("Expected a list", ("listeners",))
+
+        self.listeners = [parse_listener_def(i, x) for i, x in enumerate(listeners)]
 
         # no_tls is not really supported any more, but let's grandfather it in
         # here.
@@ -889,6 +892,9 @@ def read_gc_thresholds(
 
 def parse_listener_def(num: int, listener: Any) -> ListenerConfig:
     """parse a listener config from the config file"""
+    if not isinstance(listener, dict):
+        raise ConfigError("Expected a dictionary", ("listeners", str(num)))
+
     listener_type = listener["type"]
     # Raise a helpful error if direct TCP replication is still configured.
     if listener_type == "replication":
@@ -928,6 +934,7 @@ def parse_listener_def(num: int, listener: Any) -> ListenerConfig:
             resources=resources,
             additional_resources=listener.get("additional_resources", {}),
             tag=listener.get("tag"),
+            request_id_header=listener.get("request_id_header"),
         )
 
     return ListenerConfig(port, bind_addresses, listener_type, tls, http_config)
diff --git a/synapse/http/site.py b/synapse/http/site.py
index 1155f3f610..55a6afce35 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -72,10 +72,12 @@ class SynapseRequest(Request):
         site: "SynapseSite",
         *args: Any,
         max_request_body_size: int = 1024,
+        request_id_header: Optional[str] = None,
         **kw: Any,
     ):
         super().__init__(channel, *args, **kw)
         self._max_request_body_size = max_request_body_size
+        self.request_id_header = request_id_header
         self.synapse_site = site
         self.reactor = site.reactor
         self._channel = channel  # this is used by the tests
@@ -172,7 +174,14 @@ class SynapseRequest(Request):
         self._opentracing_span = span
 
     def get_request_id(self) -> str:
-        return "%s-%i" % (self.get_method(), self.request_seq)
+        request_id_value = None
+        if self.request_id_header:
+            request_id_value = self.getHeader(self.request_id_header)
+
+        if request_id_value is None:
+            request_id_value = str(self.request_seq)
+
+        return "%s-%s" % (self.get_method(), request_id_value)
 
     def get_redacted_uri(self) -> str:
         """Gets the redacted URI associated with the request (or placeholder if the URI
@@ -611,12 +620,15 @@ class SynapseSite(Site):
         proxied = config.http_options.x_forwarded
         request_class = XForwardedForRequest if proxied else SynapseRequest
 
+        request_id_header = config.http_options.request_id_header
+
         def request_factory(channel: HTTPChannel, queued: bool) -> Request:
             return request_class(
                 channel,
                 self,
                 max_request_body_size=max_request_body_size,
                 queued=queued,
+                request_id_header=request_id_header,
             )
 
         self.requestFactory = request_factory  # type: ignore
-- 
cgit 1.5.1


From b73cbb82157d9666e8d667733afebc0d09ed858c Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Fri, 16 Sep 2022 12:45:04 +0100
Subject: Avoid putting rejected events in room state (#13723)

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/13723.bugfix                |   1 +
 synapse/state/v2.py                     |  15 ++
 tests/handlers/test_federation_event.py | 399 ++++++++++++++++++++++++++++++++
 3 files changed, 415 insertions(+)
 create mode 100644 changelog.d/13723.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13723.bugfix b/changelog.d/13723.bugfix
new file mode 100644
index 0000000000..a23174d31d
--- /dev/null
+++ b/changelog.d/13723.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where previously rejected events could end up in room state because they pass auth checks given the current state of the room.
diff --git a/synapse/state/v2.py b/synapse/state/v2.py
index af03851c71..1b9d7d8457 100644
--- a/synapse/state/v2.py
+++ b/synapse/state/v2.py
@@ -577,6 +577,21 @@ async def _iterative_auth_checks(
                 if ev.rejected_reason is None:
                     auth_events[key] = event_map[ev_id]
 
+        if event.rejected_reason is not None:
+            # Do not admit previously rejected events into state.
+            # TODO: This isn't spec compliant. Events that were previously rejected due
+            #       to failing auth checks at their state, but pass auth checks during
+            #       state resolution should be accepted. Synapse does not handle the
+            #       change of rejection status well, so we preserve the previous
+            #       rejection status for now.
+            #
+            #       Note that events rejected for non-state reasons, such as having the
+            #       wrong auth events, should remain rejected.
+            #
+            #       https://spec.matrix.org/v1.2/rooms/v9/#rejected-events
+            #       https://github.com/matrix-org/synapse/issues/13797
+            continue
+
         try:
             event_auth.check_state_dependent_auth_rules(
                 event,
diff --git a/tests/handlers/test_federation_event.py b/tests/handlers/test_federation_event.py
index b5b89405a4..918010cddb 100644
--- a/tests/handlers/test_federation_event.py
+++ b/tests/handlers/test_federation_event.py
@@ -11,14 +11,23 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import Optional
 from unittest import mock
 
+from synapse.api.errors import AuthError
+from synapse.api.room_versions import RoomVersion
+from synapse.event_auth import (
+    check_state_dependent_auth_rules,
+    check_state_independent_auth_rules,
+)
 from synapse.events import make_event_from_dict
 from synapse.events.snapshot import EventContext
 from synapse.federation.transport.client import StateRequestResponse
 from synapse.logging.context import LoggingContext
 from synapse.rest import admin
 from synapse.rest.client import login, room
+from synapse.state.v2 import _mainline_sort, _reverse_topological_power_sort
+from synapse.types import JsonDict
 
 from tests import unittest
 from tests.test_utils import event_injection, make_awaitable
@@ -449,3 +458,393 @@ class FederationEventHandlerTests(unittest.FederatingHomeserverTestCase):
             main_store.get_event(pulled_event.event_id, allow_none=True)
         )
         self.assertIsNotNone(persisted, "pulled event was not persisted at all")
+
+    def test_process_pulled_event_with_rejected_missing_state(self) -> None:
+        """Ensure that we correctly handle pulled events with missing state containing a
+        rejected state event
+
+        In this test, we pretend we are processing a "pulled" event (eg, via backfill
+        or get_missing_events). The pulled event has a prev_event we haven't previously
+        seen, so the server requests the state at that prev_event. We expect the server
+        to make a /state request.
+
+        We simulate a remote server whose /state includes a rejected kick event for a
+        local user. Notably, the kick event is rejected only because it cites a rejected
+        auth event and would otherwise be accepted based on the room state. During state
+        resolution, we re-run auth and can potentially introduce such rejected events
+        into the state if we are not careful.
+
+        We check that the pulled event is correctly persisted, and that the state
+        afterwards does not include the rejected kick.
+        """
+        # The DAG we are testing looks like:
+        #
+        #                 ...
+        #                  |
+        #                  v
+        #       remote admin user joins
+        #                |   |
+        #        +-------+   +-------+
+        #        |                   |
+        #        |          rejected power levels
+        #        |           from remote server
+        #        |                   |
+        #        |                   v
+        #        |       rejected kick of local user
+        #        v           from remote server
+        # new power levels           |
+        #        |                   v
+        #        |             missing event
+        #        |           from remote server
+        #        |                   |
+        #        +-------+   +-------+
+        #                |   |
+        #                v   v
+        #             pulled event
+        #          from remote server
+        #
+        # (arrows are in the opposite direction to prev_events.)
+
+        OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}"
+        main_store = self.hs.get_datastores().main
+
+        # Create the room.
+        kermit_user_id = self.register_user("kermit", "test")
+        kermit_tok = self.login("kermit", "test")
+        room_id = self.helper.create_room_as(
+            room_creator=kermit_user_id, tok=kermit_tok
+        )
+        room_version = self.get_success(main_store.get_room_version(room_id))
+
+        # Add another local user to the room. This user is going to be kicked in a
+        # rejected event.
+        bert_user_id = self.register_user("bert", "test")
+        bert_tok = self.login("bert", "test")
+        self.helper.join(room_id, user=bert_user_id, tok=bert_tok)
+
+        # Allow the remote user to kick bert.
+        # The remote user is going to send a rejected power levels event later on and we
+        # need state resolution to order it before another power levels event kermit is
+        # going to send later on. Hence we give both users the same power level, so that
+        # ties are broken by `origin_server_ts`.
+        self.helper.send_state(
+            room_id,
+            "m.room.power_levels",
+            {"users": {kermit_user_id: 100, OTHER_USER: 100}},
+            tok=kermit_tok,
+        )
+
+        # Add the remote user to the room.
+        other_member_event = self.get_success(
+            event_injection.inject_member_event(self.hs, room_id, OTHER_USER, "join")
+        )
+
+        initial_state_map = self.get_success(
+            main_store.get_partial_current_state_ids(room_id)
+        )
+        create_event = self.get_success(
+            main_store.get_event(initial_state_map[("m.room.create", "")])
+        )
+        bert_member_event = self.get_success(
+            main_store.get_event(initial_state_map[("m.room.member", bert_user_id)])
+        )
+        power_levels_event = self.get_success(
+            main_store.get_event(initial_state_map[("m.room.power_levels", "")])
+        )
+
+        # We now need a rejected state event that will fail
+        # `check_state_independent_auth_rules` but pass
+        # `check_state_dependent_auth_rules`.
+
+        # First, we create a power levels event that we pretend the remote server has
+        # accepted, but the local homeserver will reject.
+        next_depth = 100
+        next_timestamp = other_member_event.origin_server_ts + 100
+        rejected_power_levels_event = make_event_from_dict(
+            self.add_hashes_and_signatures_from_other_server(
+                {
+                    "type": "m.room.power_levels",
+                    "state_key": "",
+                    "room_id": room_id,
+                    "sender": OTHER_USER,
+                    "prev_events": [other_member_event.event_id],
+                    "auth_events": [
+                        initial_state_map[("m.room.create", "")],
+                        initial_state_map[("m.room.power_levels", "")],
+                        # The event will be rejected because of the duplicated auth
+                        # event.
+                        other_member_event.event_id,
+                        other_member_event.event_id,
+                    ],
+                    "origin_server_ts": next_timestamp,
+                    "depth": next_depth,
+                    "content": power_levels_event.content,
+                }
+            ),
+            room_version,
+        )
+        next_depth += 1
+        next_timestamp += 100
+
+        with LoggingContext("send_rejected_power_levels_event"):
+            self.get_success(
+                self.hs.get_federation_event_handler()._process_pulled_event(
+                    self.OTHER_SERVER_NAME,
+                    rejected_power_levels_event,
+                    backfilled=False,
+                )
+            )
+            self.assertEqual(
+                self.get_success(
+                    main_store.get_rejection_reason(
+                        rejected_power_levels_event.event_id
+                    )
+                ),
+                "auth_error",
+            )
+
+        # Then we create a kick event for a local user that cites the rejected power
+        # levels event in its auth events. The kick event will be rejected solely
+        # because of the rejected auth event and would otherwise be accepted.
+        rejected_kick_event = make_event_from_dict(
+            self.add_hashes_and_signatures_from_other_server(
+                {
+                    "type": "m.room.member",
+                    "state_key": bert_user_id,
+                    "room_id": room_id,
+                    "sender": OTHER_USER,
+                    "prev_events": [rejected_power_levels_event.event_id],
+                    "auth_events": [
+                        initial_state_map[("m.room.create", "")],
+                        rejected_power_levels_event.event_id,
+                        initial_state_map[("m.room.member", bert_user_id)],
+                        initial_state_map[("m.room.member", OTHER_USER)],
+                    ],
+                    "origin_server_ts": next_timestamp,
+                    "depth": next_depth,
+                    "content": {"membership": "leave"},
+                }
+            ),
+            room_version,
+        )
+        next_depth += 1
+        next_timestamp += 100
+
+        # The kick event must fail the state-independent auth rules, but pass the
+        # state-dependent auth rules, so that it has a chance of making it through state
+        # resolution.
+        self.get_failure(
+            check_state_independent_auth_rules(main_store, rejected_kick_event),
+            AuthError,
+        )
+        check_state_dependent_auth_rules(
+            rejected_kick_event,
+            [create_event, power_levels_event, other_member_event, bert_member_event],
+        )
+
+        # The kick event must also win over the original member event during state
+        # resolution.
+        self.assertEqual(
+            self.get_success(
+                _mainline_sort(
+                    self.clock,
+                    room_id,
+                    event_ids=[
+                        bert_member_event.event_id,
+                        rejected_kick_event.event_id,
+                    ],
+                    resolved_power_event_id=power_levels_event.event_id,
+                    event_map={
+                        bert_member_event.event_id: bert_member_event,
+                        rejected_kick_event.event_id: rejected_kick_event,
+                    },
+                    state_res_store=main_store,
+                )
+            ),
+            [bert_member_event.event_id, rejected_kick_event.event_id],
+            "The rejected kick event will not be applied after bert's join event "
+            "during state resolution. The test setup is incorrect.",
+        )
+
+        with LoggingContext("send_rejected_kick_event"):
+            self.get_success(
+                self.hs.get_federation_event_handler()._process_pulled_event(
+                    self.OTHER_SERVER_NAME, rejected_kick_event, backfilled=False
+                )
+            )
+            self.assertEqual(
+                self.get_success(
+                    main_store.get_rejection_reason(rejected_kick_event.event_id)
+                ),
+                "auth_error",
+            )
+
+        # We need another power levels event which will win over the rejected one during
+        # state resolution, otherwise we hit other issues where we end up with rejected
+        # a power levels event during state resolution.
+        self.reactor.advance(100)  # ensure the `origin_server_ts` is larger
+        new_power_levels_event = self.get_success(
+            main_store.get_event(
+                self.helper.send_state(
+                    room_id,
+                    "m.room.power_levels",
+                    {"users": {kermit_user_id: 100, OTHER_USER: 100, bert_user_id: 1}},
+                    tok=kermit_tok,
+                )["event_id"]
+            )
+        )
+        self.assertEqual(
+            self.get_success(
+                _reverse_topological_power_sort(
+                    self.clock,
+                    room_id,
+                    event_ids=[
+                        new_power_levels_event.event_id,
+                        rejected_power_levels_event.event_id,
+                    ],
+                    event_map={},
+                    state_res_store=main_store,
+                    full_conflicted_set=set(),
+                )
+            ),
+            [rejected_power_levels_event.event_id, new_power_levels_event.event_id],
+            "The power levels events will not have the desired ordering during state "
+            "resolution. The test setup is incorrect.",
+        )
+
+        # Create a missing event, so that the local homeserver has to do a `/state` or
+        # `/state_ids` request to pull state from the remote homeserver.
+        missing_event = make_event_from_dict(
+            self.add_hashes_and_signatures_from_other_server(
+                {
+                    "type": "m.room.message",
+                    "room_id": room_id,
+                    "sender": OTHER_USER,
+                    "prev_events": [rejected_kick_event.event_id],
+                    "auth_events": [
+                        initial_state_map[("m.room.create", "")],
+                        initial_state_map[("m.room.power_levels", "")],
+                        initial_state_map[("m.room.member", OTHER_USER)],
+                    ],
+                    "origin_server_ts": next_timestamp,
+                    "depth": next_depth,
+                    "content": {"msgtype": "m.text", "body": "foo"},
+                }
+            ),
+            room_version,
+        )
+        next_depth += 1
+        next_timestamp += 100
+
+        # The pulled event has two prev events, one of which is missing. We will make a
+        # `/state` or `/state_ids` request to the remote homeserver to ask it for the
+        # state before the missing prev event.
+        pulled_event = make_event_from_dict(
+            self.add_hashes_and_signatures_from_other_server(
+                {
+                    "type": "m.room.message",
+                    "room_id": room_id,
+                    "sender": OTHER_USER,
+                    "prev_events": [
+                        new_power_levels_event.event_id,
+                        missing_event.event_id,
+                    ],
+                    "auth_events": [
+                        initial_state_map[("m.room.create", "")],
+                        new_power_levels_event.event_id,
+                        initial_state_map[("m.room.member", OTHER_USER)],
+                    ],
+                    "origin_server_ts": next_timestamp,
+                    "depth": next_depth,
+                    "content": {"msgtype": "m.text", "body": "bar"},
+                }
+            ),
+            room_version,
+        )
+        next_depth += 1
+        next_timestamp += 100
+
+        # Prepare the response for the `/state` or `/state_ids` request.
+        # The remote server believes bert has been kicked, while the local server does
+        # not.
+        state_before_missing_event = self.get_success(
+            main_store.get_events_as_list(initial_state_map.values())
+        )
+        state_before_missing_event = [
+            event
+            for event in state_before_missing_event
+            if event.event_id != bert_member_event.event_id
+        ]
+        state_before_missing_event.append(rejected_kick_event)
+
+        # We have to bump the clock a bit, to keep the retry logic in
+        # `FederationClient.get_pdu` happy
+        self.reactor.advance(60000)
+        with LoggingContext("send_pulled_event"):
+
+            async def get_event(
+                destination: str, event_id: str, timeout: Optional[int] = None
+            ) -> JsonDict:
+                self.assertEqual(destination, self.OTHER_SERVER_NAME)
+                self.assertEqual(event_id, missing_event.event_id)
+                return {"pdus": [missing_event.get_pdu_json()]}
+
+            async def get_room_state_ids(
+                destination: str, room_id: str, event_id: str
+            ) -> JsonDict:
+                self.assertEqual(destination, self.OTHER_SERVER_NAME)
+                self.assertEqual(event_id, missing_event.event_id)
+                return {
+                    "pdu_ids": [event.event_id for event in state_before_missing_event],
+                    "auth_chain_ids": [],
+                }
+
+            async def get_room_state(
+                room_version: RoomVersion, destination: str, room_id: str, event_id: str
+            ) -> StateRequestResponse:
+                self.assertEqual(destination, self.OTHER_SERVER_NAME)
+                self.assertEqual(event_id, missing_event.event_id)
+                return StateRequestResponse(
+                    state=state_before_missing_event,
+                    auth_events=[],
+                )
+
+            self.mock_federation_transport_client.get_event.side_effect = get_event
+            self.mock_federation_transport_client.get_room_state_ids.side_effect = (
+                get_room_state_ids
+            )
+            self.mock_federation_transport_client.get_room_state.side_effect = (
+                get_room_state
+            )
+
+            self.get_success(
+                self.hs.get_federation_event_handler()._process_pulled_event(
+                    self.OTHER_SERVER_NAME, pulled_event, backfilled=False
+                )
+            )
+            self.assertIsNone(
+                self.get_success(
+                    main_store.get_rejection_reason(pulled_event.event_id)
+                ),
+                "Pulled event was unexpectedly rejected, likely due to a problem with "
+                "the test setup.",
+            )
+            self.assertEqual(
+                {pulled_event.event_id},
+                self.get_success(
+                    main_store.have_events_in_timeline([pulled_event.event_id])
+                ),
+                "Pulled event was not persisted, likely due to a problem with the test "
+                "setup.",
+            )
+
+            # We must not accept rejected events into the room state, so we expect bert
+            # to not be kicked, even if the remote server believes so.
+            new_state_map = self.get_success(
+                main_store.get_partial_current_state_ids(room_id)
+            )
+            self.assertEqual(
+                new_state_map[("m.room.member", bert_user_id)],
+                bert_member_event.event_id,
+                "Rejected kick event unexpectedly became part of room state.",
+            )
-- 
cgit 1.5.1


From 74f60cec92c5aff87d6e74d177e95ec5f1a69f2b Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Fri, 16 Sep 2022 14:29:03 +0200
Subject: Add an admin API endpoint to find a user based on its external ID in
 an auth provider. (#13810)

---
 changelog.d/13810.feature        |  1 +
 docs/admin_api/user_admin_api.md | 38 ++++++++++++++++++
 synapse/rest/admin/__init__.py   |  2 +
 synapse/rest/admin/users.py      | 27 +++++++++++++
 tests/rest/admin/test_user.py    | 87 ++++++++++++++++++++++++++++++++++++++++
 5 files changed, 155 insertions(+)
 create mode 100644 changelog.d/13810.feature

(limited to 'synapse')

diff --git a/changelog.d/13810.feature b/changelog.d/13810.feature
new file mode 100644
index 0000000000..f0258af661
--- /dev/null
+++ b/changelog.d/13810.feature
@@ -0,0 +1 @@
+Add an admin API endpoint to find a user based on its external ID in an auth provider.
diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md
index 975f05c929..3625c7b6c5 100644
--- a/docs/admin_api/user_admin_api.md
+++ b/docs/admin_api/user_admin_api.md
@@ -1155,3 +1155,41 @@ GET /_synapse/admin/v1/username_available?username=$localpart
 
 The request and response format is the same as the
 [/_matrix/client/r0/register/available](https://matrix.org/docs/spec/client_server/r0.6.0#get-matrix-client-r0-register-available) API.
+
+### Find a user based on their ID in an auth provider
+
+The API is:
+
+```
+GET /_synapse/admin/v1/auth_providers/$provider/users/$external_id
+```
+
+When a user matched the given ID for the given provider, an HTTP code `200` with a response body like the following is returned:
+
+```json
+{
+    "user_id": "@hello:example.org"
+}
+```
+
+**Parameters**
+
+The following parameters should be set in the URL:
+
+- `provider` - The ID of the authentication provider, as advertised by the [`GET /_matrix/client/v3/login`](https://spec.matrix.org/latest/client-server-api/#post_matrixclientv3login) API in the `m.login.sso` authentication method.
+- `external_id` - The user ID from the authentication provider. Usually corresponds to the `sub` claim for OIDC providers, or to the `uid` attestation for SAML2 providers.
+
+The `external_id` may have characters that are not URL-safe (typically `/`, `:` or `@`), so it is advised to URL-encode those parameters.
+
+**Errors**
+
+Returns a `404` HTTP status code if no user was found, with a response body like this:
+
+```json
+{
+    "errcode":"M_NOT_FOUND",
+    "error":"User not found"
+}
+```
+
+_Added in Synapse 1.68.0._
diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py
index bac754e1b1..885669f9c7 100644
--- a/synapse/rest/admin/__init__.py
+++ b/synapse/rest/admin/__init__.py
@@ -80,6 +80,7 @@ from synapse.rest.admin.users import (
     SearchUsersRestServlet,
     ShadowBanRestServlet,
     UserAdminServlet,
+    UserByExternalId,
     UserMembershipRestServlet,
     UserRegisterServlet,
     UserRestServletV2,
@@ -275,6 +276,7 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     ListDestinationsRestServlet(hs).register(http_server)
     RoomMessagesRestServlet(hs).register(http_server)
     RoomTimestampToEventRestServlet(hs).register(http_server)
+    UserByExternalId(hs).register(http_server)
 
     # Some servlets only get registered for the main process.
     if hs.config.worker.worker_app is None:
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 78ee9b6532..2ca6b2d08a 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -1156,3 +1156,30 @@ class AccountDataRestServlet(RestServlet):
                 "rooms": by_room_data,
             },
         }
+
+
+class UserByExternalId(RestServlet):
+    """Find a user based on an external ID from an auth provider"""
+
+    PATTERNS = admin_patterns(
+        "/auth_providers/(?P<provider>[^/]*)/users/(?P<external_id>[^/]*)"
+    )
+
+    def __init__(self, hs: "HomeServer"):
+        self._auth = hs.get_auth()
+        self._store = hs.get_datastores().main
+
+    async def on_GET(
+        self,
+        request: SynapseRequest,
+        provider: str,
+        external_id: str,
+    ) -> Tuple[int, JsonDict]:
+        await assert_requester_is_admin(self._auth, request)
+
+        user_id = await self._store.get_user_by_external_id(provider, external_id)
+
+        if user_id is None:
+            raise NotFoundError("User not found")
+
+        return HTTPStatus.OK, {"user_id": user_id}
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index ec5ccf6fca..9f536ceeb3 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -4140,3 +4140,90 @@ class AccountDataTestCase(unittest.HomeserverTestCase):
             {"b": 2},
             channel.json_body["account_data"]["rooms"]["test_room"]["m.per_room"],
         )
+
+
+class UsersByExternalIdTestCase(unittest.HomeserverTestCase):
+
+    servlets = [
+        synapse.rest.admin.register_servlets,
+        login.register_servlets,
+    ]
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.store = hs.get_datastores().main
+
+        self.admin_user = self.register_user("admin", "pass", admin=True)
+        self.admin_user_tok = self.login("admin", "pass")
+
+        self.other_user = self.register_user("user", "pass")
+        self.get_success(
+            self.store.record_user_external_id(
+                "the-auth-provider", "the-external-id", self.other_user
+            )
+        )
+        self.get_success(
+            self.store.record_user_external_id(
+                "another-auth-provider", "a:complex@external/id", self.other_user
+            )
+        )
+
+    def test_no_auth(self) -> None:
+        """Try to lookup a user without authentication."""
+        url = (
+            "/_synapse/admin/v1/auth_providers/the-auth-provider/users/the-external-id"
+        )
+
+        channel = self.make_request(
+            "GET",
+            url,
+        )
+
+        self.assertEqual(401, channel.code, msg=channel.json_body)
+        self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"])
+
+    def test_binding_does_not_exist(self) -> None:
+        """Tests that a lookup for an external ID that does not exist returns a 404"""
+        url = "/_synapse/admin/v1/auth_providers/the-auth-provider/users/unknown-id"
+
+        channel = self.make_request(
+            "GET",
+            url,
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(404, channel.code, msg=channel.json_body)
+        self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"])
+
+    def test_success(self) -> None:
+        """Tests a successful external ID lookup"""
+        url = (
+            "/_synapse/admin/v1/auth_providers/the-auth-provider/users/the-external-id"
+        )
+
+        channel = self.make_request(
+            "GET",
+            url,
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual(
+            {"user_id": self.other_user},
+            channel.json_body,
+        )
+
+    def test_success_urlencoded(self) -> None:
+        """Tests a successful external ID lookup with an url-encoded ID"""
+        url = "/_synapse/admin/v1/auth_providers/another-auth-provider/users/a%3Acomplex%40external%2Fid"
+
+        channel = self.make_request(
+            "GET",
+            url,
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual(
+            {"user_id": self.other_user},
+            channel.json_body,
+        )
-- 
cgit 1.5.1


From d64e85197af31f5642f64ae1d86f5a0c74050fec Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Fri, 16 Sep 2022 16:16:05 +0100
Subject: Remove error spam when users query the keys of departed remote users
 (#13826)

The error message introduced in #13749 has turned out to be very spammy.
Remove it for now.
---
 changelog.d/13826.bugfix     |  1 +
 synapse/handlers/e2e_keys.py | 21 ++++++++++++---------
 2 files changed, 13 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/13826.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13826.bugfix b/changelog.d/13826.bugfix
new file mode 100644
index 0000000000..8ffafec07b
--- /dev/null
+++ b/changelog.d/13826.bugfix
@@ -0,0 +1 @@
+Fix a long standing bug where device lists would remain cached when remote users left and rejoined the last room shared with the local homeserver.
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index 8eed63ccf3..09a2492afc 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -188,18 +188,21 @@ class E2eKeysHandler:
                 )
                 invalid_cached_users = cached_users - valid_cached_users
                 if invalid_cached_users:
-                    # Fix up results. If we get here, there is either a bug in device
-                    # list tracking, or we hit the race mentioned above.
+                    # Fix up results. If we get here, it means there was either a bug in
+                    # device list tracking, or we hit the race mentioned above.
+                    # TODO: In practice, this path is hit fairly often in existing
+                    #       deployments when clients query the keys of departed remote
+                    #       users. A background update to mark the appropriate device
+                    #       lists as unsubscribed is needed.
+                    #       https://github.com/matrix-org/synapse/issues/13651
+                    # Note that this currently introduces a failure mode when clients
+                    # are trying to decrypt old messages from a remote user whose
+                    # homeserver is no longer available. We may want to consider falling
+                    # back to the cached data when we fail to retrieve a device list
+                    # over federation for such remote users.
                     user_ids_not_in_cache.update(invalid_cached_users)
                     for invalid_user_id in invalid_cached_users:
                         remote_results.pop(invalid_user_id)
-                    # This log message may be removed if it turns out it's almost
-                    # entirely triggered by races.
-                    logger.error(
-                        "Devices for %s were cached, but the server no longer shares "
-                        "any rooms with them. The cached device lists are stale.",
-                        invalid_cached_users,
-                    )
 
                 for user_id, devices in remote_results.items():
                     user_devices = results.setdefault(user_id, {})
-- 
cgit 1.5.1


From 44be42338e032a50e5fc3d6c69be4055f33cb26c Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Fri, 16 Sep 2022 10:56:56 -0500
Subject: Add support to purge rows from MSC2716 and other tables when purging
 a room (#13825)

`event_failed_pull_attempts` added in https://github.com/matrix-org/synapse/pull/13589

MSC2716 related tables added in:

 - https://github.com/matrix-org/synapse/pull/10245/files#diff-3d42dfb44d02f7de3aada105e0bdc1cc9dd7f953cbf0f36c5d0f50827bf0320aR1
    - Renamed in https://github.com/matrix-org/synapse/pull/10838/files#diff-2730bfbe9e688b55e46f9371aefe67dac2bd2b2b7d9d6b92774eea1fcfae156dR1
 - https://github.com/matrix-org/synapse/pull/10498/files#diff-c52bbfbb5921a3f6f023b24343668479d966fac164f13b7c39d2197ce3afa7a5R1
---
 changelog.d/13825.bugfix                           |  1 +
 synapse/storage/databases/main/purge_events.py     |  5 +++++
 synapse/storage/schema/__init__.py                 |  2 ++
 .../delta/73/02room_id_indexes_for_purging.sql     | 22 ++++++++++++++++++++++
 4 files changed, 30 insertions(+)
 create mode 100644 changelog.d/13825.bugfix
 create mode 100644 synapse/storage/schema/main/delta/73/02room_id_indexes_for_purging.sql

(limited to 'synapse')

diff --git a/changelog.d/13825.bugfix b/changelog.d/13825.bugfix
new file mode 100644
index 0000000000..626fc6349f
--- /dev/null
+++ b/changelog.d/13825.bugfix
@@ -0,0 +1 @@
+Delete associated data from `event_failed_pull_attempts`, `insertion_events`, `insertion_event_extremities`, `insertion_event_extremities`, `insertion_event_extremities` when purging the room.
diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py
index f6822707e4..9213ce0b5a 100644
--- a/synapse/storage/databases/main/purge_events.py
+++ b/synapse/storage/databases/main/purge_events.py
@@ -419,6 +419,7 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
             "event_forward_extremities",
             "event_push_actions",
             "event_search",
+            "event_failed_pull_attempts",
             "partial_state_events",
             "events",
             "federation_inbound_events_staging",
@@ -441,6 +442,10 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
             "e2e_room_keys",
             "event_push_summary",
             "pusher_throttle",
+            "insertion_events",
+            "insertion_event_extremities",
+            "insertion_event_edges",
+            "batch_events",
             "room_account_data",
             "room_tags",
             # "rooms" happens last, to keep the foreign keys in the other tables
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 68e055c664..f29424d17a 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -83,6 +83,8 @@ Changes in SCHEMA_VERSION = 73;
       event_push_summary, receipts_linearized, and receipts_graph.
     - Add table `event_failed_pull_attempts` to keep track when we fail to pull
       events over federation.
+    - Add indexes to various tables (`event_failed_pull_attempts`, `insertion_events`,
+      `batch_events`) to make it easy to delete all associated rows when purging a room.
 """
 
 
diff --git a/synapse/storage/schema/main/delta/73/02room_id_indexes_for_purging.sql b/synapse/storage/schema/main/delta/73/02room_id_indexes_for_purging.sql
new file mode 100644
index 0000000000..6d38bdd430
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/02room_id_indexes_for_purging.sql
@@ -0,0 +1,22 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Add index so we can easily purge all rows from a given `room_id`
+CREATE INDEX IF NOT EXISTS event_failed_pull_attempts_room_id ON event_failed_pull_attempts(room_id);
+
+-- MSC2716 related tables:
+-- Add indexes so we can easily purge all rows from a given `room_id`
+CREATE INDEX IF NOT EXISTS insertion_events_room_id ON insertion_events(room_id);
+CREATE INDEX IF NOT EXISTS batch_events_room_id ON batch_events(room_id);
-- 
cgit 1.5.1


From c802ef14119b21cfdf8f5a9c246b695c98c0f718 Mon Sep 17 00:00:00 2001
From: Denis <dakariakin@gmail.com>
Date: Tue, 20 Sep 2022 10:44:38 +0200
Subject: Don't include redundant prev_state in new events (#13791)

---
 changelog.d/13791.removal               | 1 +
 synapse/events/builder.py               | 1 -
 synapse/federation/federation_client.py | 3 ---
 3 files changed, 1 insertion(+), 4 deletions(-)
 create mode 100644 changelog.d/13791.removal

(limited to 'synapse')

diff --git a/changelog.d/13791.removal b/changelog.d/13791.removal
new file mode 100644
index 0000000000..283226b63e
--- /dev/null
+++ b/changelog.d/13791.removal
@@ -0,0 +1 @@
+Don't include redundant `prev_state` in new events. Contributed by Denis Kariakin (@dakariakin).
diff --git a/synapse/events/builder.py b/synapse/events/builder.py
index 746bd3978d..e2ee10dd3d 100644
--- a/synapse/events/builder.py
+++ b/synapse/events/builder.py
@@ -167,7 +167,6 @@ class EventBuilder:
             "content": self.content,
             "unsigned": self.unsigned,
             "depth": depth,
-            "prev_state": [],
         }
 
         if self.is_state():
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index 4a4289ee7c..464672a3da 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -906,9 +906,6 @@ class FederationClient(FederationBase):
             # The protoevent received over the JSON wire may not have all
             # the required fields. Lets just gloss over that because
             # there's some we never care about
-            if "prev_state" not in pdu_dict:
-                pdu_dict["prev_state"] = []
-
             ev = builder.create_local_event_from_event_dict(
                 self._clock,
                 self.hostname,
-- 
cgit 1.5.1


From 42d261c32f13e2de7494a0ade77c1f7b646af1fe Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 20 Sep 2022 12:10:31 +0100
Subject: Port the push rule classes to Rust. (#13768)

---
 .rustfmt.toml                               |   1 +
 changelog.d/13768.misc                      |   1 +
 rust/Cargo.toml                             |  10 +-
 rust/src/lib.rs                             |   9 +-
 rust/src/push/base_rules.rs                 | 335 ++++++++++++++++
 rust/src/push/mod.rs                        | 502 ++++++++++++++++++++++++
 stubs/synapse/synapse_rust.pyi              |   2 -
 stubs/synapse/synapse_rust/__init__.pyi     |   2 +
 stubs/synapse/synapse_rust/push.pyi         |  37 ++
 synapse/handlers/push_rules.py              |   5 +-
 synapse/push/baserules.py                   | 583 ----------------------------
 synapse/push/bulk_push_rule_evaluator.py    |   7 +-
 synapse/push/clientformat.py                |   5 +-
 synapse/storage/databases/main/push_rule.py |  23 +-
 tests/handlers/test_deactivate_account.py   |  27 +-
 15 files changed, 932 insertions(+), 617 deletions(-)
 create mode 100644 .rustfmt.toml
 create mode 100644 changelog.d/13768.misc
 create mode 100644 rust/src/push/base_rules.rs
 create mode 100644 rust/src/push/mod.rs
 delete mode 100644 stubs/synapse/synapse_rust.pyi
 create mode 100644 stubs/synapse/synapse_rust/__init__.pyi
 create mode 100644 stubs/synapse/synapse_rust/push.pyi
 delete mode 100644 synapse/push/baserules.py

(limited to 'synapse')

diff --git a/.rustfmt.toml b/.rustfmt.toml
new file mode 100644
index 0000000000..bf96e7743d
--- /dev/null
+++ b/.rustfmt.toml
@@ -0,0 +1 @@
+group_imports = "StdExternalCrate"
diff --git a/changelog.d/13768.misc b/changelog.d/13768.misc
new file mode 100644
index 0000000000..28bddb7059
--- /dev/null
+++ b/changelog.d/13768.misc
@@ -0,0 +1 @@
+Port push rules to using Rust.
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index deddf3cec2..8dc5f93ff1 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -18,7 +18,15 @@ crate-type = ["cdylib"]
 name = "synapse.synapse_rust"
 
 [dependencies]
-pyo3 = { version = "0.16.5", features = ["extension-module", "macros", "abi3", "abi3-py37"] }
+anyhow = "1.0.63"
+lazy_static = "1.4.0"
+log = "0.4.17"
+pyo3 = { version = "0.17.1", features = ["extension-module", "macros", "anyhow", "abi3", "abi3-py37"] }
+pyo3-log = "0.7.0"
+pythonize = "0.17.0"
+regex = "1.6.0"
+serde = { version = "1.0.144", features = ["derive"] }
+serde_json = "1.0.85"
 
 [build-dependencies]
 blake2 = "0.10.4"
diff --git a/rust/src/lib.rs b/rust/src/lib.rs
index ba42465fb8..c7b60e58a7 100644
--- a/rust/src/lib.rs
+++ b/rust/src/lib.rs
@@ -1,5 +1,7 @@
 use pyo3::prelude::*;
 
+pub mod push;
+
 /// Returns the hash of all the rust source files at the time it was compiled.
 ///
 /// Used by python to detect if the rust library is outdated.
@@ -17,8 +19,13 @@ fn sum_as_string(a: usize, b: usize) -> PyResult<String> {
 
 /// The entry point for defining the Python module.
 #[pymodule]
-fn synapse_rust(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
+fn synapse_rust(py: Python<'_>, m: &PyModule) -> PyResult<()> {
+    pyo3_log::init();
+
     m.add_function(wrap_pyfunction!(sum_as_string, m)?)?;
     m.add_function(wrap_pyfunction!(get_rust_file_digest, m)?)?;
+
+    push::register_module(py, m)?;
+
     Ok(())
 }
diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs
new file mode 100644
index 0000000000..7c62bc4849
--- /dev/null
+++ b/rust/src/push/base_rules.rs
@@ -0,0 +1,335 @@
+// Copyright 2022 The Matrix.org Foundation C.I.C.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Contains the definitions of the "base" push rules.
+
+use std::borrow::Cow;
+use std::collections::HashMap;
+
+use lazy_static::lazy_static;
+use serde_json::Value;
+
+use super::KnownCondition;
+use crate::push::Action;
+use crate::push::Condition;
+use crate::push::EventMatchCondition;
+use crate::push::PushRule;
+use crate::push::SetTweak;
+use crate::push::TweakValue;
+
+const HIGHLIGHT_ACTION: Action = Action::SetTweak(SetTweak {
+    set_tweak: Cow::Borrowed("highlight"),
+    value: None,
+    other_keys: Value::Null,
+});
+
+const HIGHLIGHT_FALSE_ACTION: Action = Action::SetTweak(SetTweak {
+    set_tweak: Cow::Borrowed("highlight"),
+    value: Some(TweakValue::Other(Value::Bool(false))),
+    other_keys: Value::Null,
+});
+
+const SOUND_ACTION: Action = Action::SetTweak(SetTweak {
+    set_tweak: Cow::Borrowed("sound"),
+    value: Some(TweakValue::String(Cow::Borrowed("default"))),
+    other_keys: Value::Null,
+});
+
+const RING_ACTION: Action = Action::SetTweak(SetTweak {
+    set_tweak: Cow::Borrowed("sound"),
+    value: Some(TweakValue::String(Cow::Borrowed("ring"))),
+    other_keys: Value::Null,
+});
+
+pub const BASE_PREPEND_OVERRIDE_RULES: &[PushRule] = &[PushRule {
+    rule_id: Cow::Borrowed("global/override/.m.rule.master"),
+    priority_class: 5,
+    conditions: Cow::Borrowed(&[]),
+    actions: Cow::Borrowed(&[Action::DontNotify]),
+    default: true,
+    default_enabled: false,
+}];
+
+pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[
+    PushRule {
+        rule_id: Cow::Borrowed("global/override/.m.rule.suppress_notices"),
+        priority_class: 5,
+        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch(
+            EventMatchCondition {
+                key: Cow::Borrowed("content.msgtype"),
+                pattern: Some(Cow::Borrowed("m.notice")),
+                pattern_type: None,
+            },
+        ))]),
+        actions: Cow::Borrowed(&[Action::DontNotify]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/override/.m.rule.invite_for_me"),
+        priority_class: 5,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("m.room.member")),
+                pattern_type: None,
+            })),
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("content.membership"),
+                pattern: Some(Cow::Borrowed("invite")),
+                pattern_type: None,
+            })),
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("state_key"),
+                pattern: None,
+                pattern_type: Some(Cow::Borrowed("user_id")),
+            })),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION, SOUND_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/override/.m.rule.member_event"),
+        priority_class: 5,
+        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch(
+            EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("m.room.member")),
+                pattern_type: None,
+            },
+        ))]),
+        actions: Cow::Borrowed(&[Action::DontNotify]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/override/.m.rule.contains_display_name"),
+        priority_class: 5,
+        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::ContainsDisplayName)]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_ACTION, SOUND_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/override/.m.rule.roomnotif"),
+        priority_class: 5,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::SenderNotificationPermission {
+                key: Cow::Borrowed("room"),
+            }),
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("content.body"),
+                pattern: Some(Cow::Borrowed("@room")),
+                pattern_type: None,
+            })),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/override/.m.rule.tombstone"),
+        priority_class: 5,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("m.room.tombstone")),
+                pattern_type: None,
+            })),
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("state_key"),
+                pattern: Some(Cow::Borrowed("")),
+                pattern_type: None,
+            })),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/override/.m.rule.reaction"),
+        priority_class: 5,
+        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch(
+            EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("m.reaction")),
+                pattern_type: None,
+            },
+        ))]),
+        actions: Cow::Borrowed(&[Action::DontNotify]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/override/.org.matrix.msc3786.rule.room.server_acl"),
+        priority_class: 5,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("m.room.server_acl")),
+                pattern_type: None,
+            })),
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("state_key"),
+                pattern: Some(Cow::Borrowed("")),
+                pattern_type: None,
+            })),
+        ]),
+        actions: Cow::Borrowed(&[]),
+        default: true,
+        default_enabled: true,
+    },
+];
+
+pub const BASE_APPEND_CONTENT_RULES: &[PushRule] = &[PushRule {
+    rule_id: Cow::Borrowed("global/content/.m.rule.contains_user_name"),
+    priority_class: 4,
+    conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch(
+        EventMatchCondition {
+            key: Cow::Borrowed("content.body"),
+            pattern: None,
+            pattern_type: Some(Cow::Borrowed("user_localpart")),
+        },
+    ))]),
+    actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_ACTION, SOUND_ACTION]),
+    default: true,
+    default_enabled: true,
+}];
+
+pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.m.rule.call"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch(
+            EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("m.call.invite")),
+                pattern_type: None,
+            },
+        ))]),
+        actions: Cow::Borrowed(&[Action::Notify, RING_ACTION, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.m.rule.room_one_to_one"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("m.room.message")),
+                pattern_type: None,
+            })),
+            Condition::Known(KnownCondition::RoomMemberCount {
+                is: Some(Cow::Borrowed("2")),
+            }),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, SOUND_ACTION, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.m.rule.encrypted_room_one_to_one"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("m.room.encrypted")),
+                pattern_type: None,
+            })),
+            Condition::Known(KnownCondition::RoomMemberCount {
+                is: Some(Cow::Borrowed("2")),
+            }),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, SOUND_ACTION, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.org.matrix.msc3772.thread_reply"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::RelationMatch {
+            rel_type: Cow::Borrowed("m.thread"),
+            sender: None,
+            sender_type: Some(Cow::Borrowed("user_id")),
+        })]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.m.rule.message"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch(
+            EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("m.room.message")),
+                pattern_type: None,
+            },
+        ))]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.m.rule.encrypted"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch(
+            EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("m.room.encrypted")),
+                pattern_type: None,
+            },
+        ))]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.im.vector.jitsi"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("im.vector.modular.widgets")),
+                pattern_type: None,
+            })),
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("content.type"),
+                pattern: Some(Cow::Borrowed("jitsi")),
+                pattern_type: None,
+            })),
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("state_key"),
+                pattern: Some(Cow::Borrowed("*")),
+                pattern_type: None,
+            })),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+];
+
+lazy_static! {
+    pub static ref BASE_RULES_BY_ID: HashMap<&'static str, &'static PushRule> =
+        BASE_PREPEND_OVERRIDE_RULES
+            .iter()
+            .chain(BASE_APPEND_OVERRIDE_RULES.iter())
+            .chain(BASE_APPEND_CONTENT_RULES.iter())
+            .chain(BASE_APPEND_UNDERRIDE_RULES.iter())
+            .map(|rule| { (&*rule.rule_id, rule) })
+            .collect();
+}
diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs
new file mode 100644
index 0000000000..de6764e7c5
--- /dev/null
+++ b/rust/src/push/mod.rs
@@ -0,0 +1,502 @@
+// Copyright 2022 The Matrix.org Foundation C.I.C.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! An implementation of Matrix push rules.
+//!
+//! The `Cow<_>` type is used extensively within this module to allow creating
+//! the base rules as constants (in Rust constants can't require explicit
+//! allocation atm).
+//!
+//! ---
+//!
+//! Push rules is the system used to determine which events trigger a push (and a
+//! bump in notification counts).
+//!
+//! This consists of a list of "push rules" for each user, where a push rule is a
+//! pair of "conditions" and "actions". When a user receives an event Synapse
+//! iterates over the list of push rules until it finds one where all the conditions
+//! match the event, at which point "actions" describe the outcome (e.g. notify,
+//! highlight, etc).
+//!
+//! Push rules are split up into 5 different "kinds" (aka "priority classes"), which
+//! are run in order:
+//!     1. Override — highest priority rules, e.g. always ignore notices
+//!     2. Content — content specific rules, e.g. @ notifications
+//!     3. Room — per room rules, e.g. enable/disable notifications for all messages
+//!        in a room
+//!     4. Sender — per sender rules, e.g. never notify for messages from a given
+//!        user
+//!     5. Underride — the lowest priority "default" rules, e.g. notify for every
+//!        message.
+//!
+//! The set of "base rules" are the list of rules that every user has by default. A
+//! user can modify their copy of the push rules in one of three ways:
+//!
+//!     1. Adding a new push rule of a certain kind
+//!     2. Changing the actions of a base rule
+//!     3. Enabling/disabling a base rule.
+//!
+//! The base rules are split into whether they come before or after a particular
+//! kind, so the order of push rule evaluation would be: base rules for before
+//! "override" kind, user defined "override" rules, base rules after "override"
+//! kind, etc, etc.
+
+use std::borrow::Cow;
+use std::collections::{BTreeMap, HashMap, HashSet};
+
+use anyhow::{Context, Error};
+use log::warn;
+use pyo3::prelude::*;
+use pythonize::pythonize;
+use serde::de::Error as _;
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+
+mod base_rules;
+
+/// Called when registering modules with python.
+pub fn register_module(py: Python<'_>, m: &PyModule) -> PyResult<()> {
+    let child_module = PyModule::new(py, "push")?;
+    child_module.add_class::<PushRule>()?;
+    child_module.add_class::<PushRules>()?;
+    child_module.add_class::<FilteredPushRules>()?;
+    child_module.add_function(wrap_pyfunction!(get_base_rule_ids, m)?)?;
+
+    m.add_submodule(child_module)?;
+
+    // We need to manually add the module to sys.modules to make `from
+    // synapse.synapse_rust import push` work.
+    py.import("sys")?
+        .getattr("modules")?
+        .set_item("synapse.synapse_rust.push", child_module)?;
+
+    Ok(())
+}
+
+#[pyfunction]
+fn get_base_rule_ids() -> HashSet<&'static str> {
+    base_rules::BASE_RULES_BY_ID.keys().copied().collect()
+}
+
+/// A single push rule for a user.
+#[derive(Debug, Clone)]
+#[pyclass(frozen)]
+pub struct PushRule {
+    /// A unique ID for this rule
+    pub rule_id: Cow<'static, str>,
+    /// The "kind" of push rule this is (see `PRIORITY_CLASS_MAP` in Python)
+    #[pyo3(get)]
+    pub priority_class: i32,
+    /// The conditions that must all match for actions to be applied
+    pub conditions: Cow<'static, [Condition]>,
+    /// The actions to apply if all conditions are met
+    pub actions: Cow<'static, [Action]>,
+    /// Whether this is a base rule
+    #[pyo3(get)]
+    pub default: bool,
+    /// Whether this is enabled by default
+    #[pyo3(get)]
+    pub default_enabled: bool,
+}
+
+#[pymethods]
+impl PushRule {
+    #[staticmethod]
+    pub fn from_db(
+        rule_id: String,
+        priority_class: i32,
+        conditions: &str,
+        actions: &str,
+    ) -> Result<PushRule, Error> {
+        let conditions = serde_json::from_str(conditions).context("parsing conditions")?;
+        let actions = serde_json::from_str(actions).context("parsing actions")?;
+
+        Ok(PushRule {
+            rule_id: Cow::Owned(rule_id),
+            priority_class,
+            conditions,
+            actions,
+            default: false,
+            default_enabled: true,
+        })
+    }
+
+    #[getter]
+    fn rule_id(&self) -> &str {
+        &self.rule_id
+    }
+
+    #[getter]
+    fn actions(&self) -> Vec<Action> {
+        self.actions.clone().into_owned()
+    }
+
+    #[getter]
+    fn conditions(&self) -> Vec<Condition> {
+        self.conditions.clone().into_owned()
+    }
+
+    fn __repr__(&self) -> String {
+        format!(
+            "<PushRule rule_id={}, conditions={:?}, actions={:?}>",
+            self.rule_id, self.conditions, self.actions
+        )
+    }
+}
+
+/// The "action" Synapse should perform for a matching push rule.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum Action {
+    DontNotify,
+    Notify,
+    Coalesce,
+    SetTweak(SetTweak),
+
+    // An unrecognized custom action.
+    Unknown(Value),
+}
+
+impl IntoPy<PyObject> for Action {
+    fn into_py(self, py: Python<'_>) -> PyObject {
+        // When we pass the `Action` struct to Python we want it to be converted
+        // to a dict. We use `pythonize`, which converts the struct using the
+        // `serde` serialization.
+        pythonize(py, &self).expect("valid action")
+    }
+}
+
+/// The body of a `SetTweak` push action.
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+pub struct SetTweak {
+    set_tweak: Cow<'static, str>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    value: Option<TweakValue>,
+
+    // This picks up any other fields that may have been added by clients.
+    // These get added when we convert the `Action` to a python object.
+    #[serde(flatten)]
+    other_keys: Value,
+}
+
+/// The value of a `set_tweak`.
+///
+/// We need this (rather than using `TweakValue` directly) so that we can use
+/// `&'static str` in the value when defining the constant base rules.
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+#[serde(untagged)]
+pub enum TweakValue {
+    String(Cow<'static, str>),
+    Other(Value),
+}
+
+impl Serialize for Action {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        match self {
+            Action::DontNotify => serializer.serialize_str("dont_notify"),
+            Action::Notify => serializer.serialize_str("notify"),
+            Action::Coalesce => serializer.serialize_str("coalesce"),
+            Action::SetTweak(tweak) => tweak.serialize(serializer),
+            Action::Unknown(value) => value.serialize(serializer),
+        }
+    }
+}
+
+/// Simple helper class for deserializing Action from JSON.
+#[derive(Deserialize)]
+#[serde(untagged)]
+enum ActionDeserializeHelper {
+    Str(String),
+    SetTweak(SetTweak),
+    Unknown(Value),
+}
+
+impl<'de> Deserialize<'de> for Action {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        let helper: ActionDeserializeHelper = Deserialize::deserialize(deserializer)?;
+        match helper {
+            ActionDeserializeHelper::Str(s) => match &*s {
+                "dont_notify" => Ok(Action::DontNotify),
+                "notify" => Ok(Action::Notify),
+                "coalesce" => Ok(Action::Coalesce),
+                _ => Err(D::Error::custom("unrecognized action")),
+            },
+            ActionDeserializeHelper::SetTweak(set_tweak) => Ok(Action::SetTweak(set_tweak)),
+            ActionDeserializeHelper::Unknown(value) => Ok(Action::Unknown(value)),
+        }
+    }
+}
+
+/// A condition used in push rules to match against an event.
+///
+/// We need this split as `serde` doesn't give us the ability to have a
+/// "catchall" variant in tagged enums.
+#[derive(Serialize, Deserialize, Debug, Clone)]
+#[serde(untagged)]
+pub enum Condition {
+    /// A recognized condition that we can match against
+    Known(KnownCondition),
+    /// An unrecognized condition that we ignore.
+    Unknown(Value),
+}
+
+/// The set of "known" conditions that we can handle.
+#[derive(Serialize, Deserialize, Debug, Clone)]
+#[serde(rename_all = "snake_case")]
+#[serde(tag = "kind")]
+pub enum KnownCondition {
+    EventMatch(EventMatchCondition),
+    ContainsDisplayName,
+    RoomMemberCount {
+        #[serde(skip_serializing_if = "Option::is_none")]
+        is: Option<Cow<'static, str>>,
+    },
+    SenderNotificationPermission {
+        key: Cow<'static, str>,
+    },
+    #[serde(rename = "org.matrix.msc3772.relation_match")]
+    RelationMatch {
+        rel_type: Cow<'static, str>,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        sender: Option<Cow<'static, str>>,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        sender_type: Option<Cow<'static, str>>,
+    },
+}
+
+impl IntoPy<PyObject> for Condition {
+    fn into_py(self, py: Python<'_>) -> PyObject {
+        pythonize(py, &self).expect("valid condition")
+    }
+}
+
+/// The body of a [`Condition::EventMatch`]
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct EventMatchCondition {
+    key: Cow<'static, str>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pattern: Option<Cow<'static, str>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pattern_type: Option<Cow<'static, str>>,
+}
+
+/// The collection of push rules for a user.
+#[derive(Debug, Clone, Default)]
+#[pyclass(frozen)]
+struct PushRules {
+    /// Custom push rules that override a base rule.
+    overridden_base_rules: HashMap<Cow<'static, str>, PushRule>,
+
+    /// Custom rules that come between the prepend/append override base rules.
+    override_rules: Vec<PushRule>,
+    /// Custom rules that come before the base content rules.
+    content: Vec<PushRule>,
+    /// Custom rules that come before the base room rules.
+    room: Vec<PushRule>,
+    /// Custom rules that come before the base sender rules.
+    sender: Vec<PushRule>,
+    /// Custom rules that come before the base underride rules.
+    underride: Vec<PushRule>,
+}
+
+#[pymethods]
+impl PushRules {
+    #[new]
+    fn new(rules: Vec<PushRule>) -> PushRules {
+        let mut push_rules: PushRules = Default::default();
+
+        for rule in rules {
+            if let Some(&o) = base_rules::BASE_RULES_BY_ID.get(&*rule.rule_id) {
+                push_rules.overridden_base_rules.insert(
+                    rule.rule_id.clone(),
+                    PushRule {
+                        actions: rule.actions.clone(),
+                        ..o.clone()
+                    },
+                );
+
+                continue;
+            }
+
+            match rule.priority_class {
+                5 => push_rules.override_rules.push(rule),
+                4 => push_rules.content.push(rule),
+                3 => push_rules.room.push(rule),
+                2 => push_rules.sender.push(rule),
+                1 => push_rules.underride.push(rule),
+                _ => {
+                    warn!(
+                        "Unrecognized priority class for rule {}: {}",
+                        rule.rule_id, rule.priority_class
+                    );
+                }
+            }
+        }
+
+        push_rules
+    }
+
+    /// Returns the list of all rules, including base rules, in the order they
+    /// should be executed in.
+    fn rules(&self) -> Vec<PushRule> {
+        self.iter().cloned().collect()
+    }
+}
+
+impl PushRules {
+    /// Iterates over all the rules, including base rules, in the order they
+    /// should be executed in.
+    pub fn iter(&self) -> impl Iterator<Item = &PushRule> {
+        base_rules::BASE_PREPEND_OVERRIDE_RULES
+            .iter()
+            .chain(self.override_rules.iter())
+            .chain(base_rules::BASE_APPEND_OVERRIDE_RULES.iter())
+            .chain(self.content.iter())
+            .chain(base_rules::BASE_APPEND_CONTENT_RULES.iter())
+            .chain(self.room.iter())
+            .chain(self.sender.iter())
+            .chain(self.underride.iter())
+            .chain(base_rules::BASE_APPEND_UNDERRIDE_RULES.iter())
+            .map(|rule| {
+                self.overridden_base_rules
+                    .get(&*rule.rule_id)
+                    .unwrap_or(rule)
+            })
+    }
+}
+
+/// A wrapper around `PushRules` that checks the enabled state of rules and
+/// filters out disabled experimental rules.
+#[derive(Debug, Clone, Default)]
+#[pyclass(frozen)]
+pub struct FilteredPushRules {
+    push_rules: PushRules,
+    enabled_map: BTreeMap<String, bool>,
+    msc3786_enabled: bool,
+    msc3772_enabled: bool,
+}
+
+#[pymethods]
+impl FilteredPushRules {
+    #[new]
+    fn py_new(
+        push_rules: PushRules,
+        enabled_map: BTreeMap<String, bool>,
+        msc3786_enabled: bool,
+        msc3772_enabled: bool,
+    ) -> Self {
+        Self {
+            push_rules,
+            enabled_map,
+            msc3786_enabled,
+            msc3772_enabled,
+        }
+    }
+
+    /// Returns the list of all rules and their enabled state, including base
+    /// rules, in the order they should be executed in.
+    fn rules(&self) -> Vec<(PushRule, bool)> {
+        self.iter().map(|(r, e)| (r.clone(), e)).collect()
+    }
+}
+
+impl FilteredPushRules {
+    /// Iterates over all the rules and their enabled state, including base
+    /// rules, in the order they should be executed in.
+    fn iter(&self) -> impl Iterator<Item = (&PushRule, bool)> {
+        self.push_rules
+            .iter()
+            .filter(|rule| {
+                // Ignore disabled experimental push rules
+                if !self.msc3786_enabled
+                    && rule.rule_id == "global/override/.org.matrix.msc3786.rule.room.server_acl"
+                {
+                    return false;
+                }
+
+                if !self.msc3772_enabled
+                    && rule.rule_id == "global/underride/.org.matrix.msc3772.thread_reply"
+                {
+                    return false;
+                }
+
+                true
+            })
+            .map(|r| {
+                let enabled = *self
+                    .enabled_map
+                    .get(&*r.rule_id)
+                    .unwrap_or(&r.default_enabled);
+                (r, enabled)
+            })
+    }
+}
+
+#[test]
+fn test_serialize_condition() {
+    let condition = Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+        key: "content.body".into(),
+        pattern: Some("coffee".into()),
+        pattern_type: None,
+    }));
+
+    let json = serde_json::to_string(&condition).unwrap();
+    assert_eq!(
+        json,
+        r#"{"kind":"event_match","key":"content.body","pattern":"coffee"}"#
+    )
+}
+
+#[test]
+fn test_deserialize_condition() {
+    let json = r#"{"kind":"event_match","key":"content.body","pattern":"coffee"}"#;
+
+    let _: Condition = serde_json::from_str(json).unwrap();
+}
+
+#[test]
+fn test_deserialize_custom_condition() {
+    let json = r#"{"kind":"custom_tag"}"#;
+
+    let condition: Condition = serde_json::from_str(json).unwrap();
+    assert!(matches!(condition, Condition::Unknown(_)));
+
+    let new_json = serde_json::to_string(&condition).unwrap();
+    assert_eq!(json, new_json);
+}
+
+#[test]
+fn test_deserialize_action() {
+    let _: Action = serde_json::from_str(r#""notify""#).unwrap();
+    let _: Action = serde_json::from_str(r#""dont_notify""#).unwrap();
+    let _: Action = serde_json::from_str(r#""coalesce""#).unwrap();
+    let _: Action = serde_json::from_str(r#"{"set_tweak": "highlight"}"#).unwrap();
+}
+
+#[test]
+fn test_custom_action() {
+    let json = r#"{"some_custom":"action_fields"}"#;
+
+    let action: Action = serde_json::from_str(json).unwrap();
+    assert!(matches!(action, Action::Unknown(_)));
+
+    let new_json = serde_json::to_string(&action).unwrap();
+    assert_eq!(json, new_json);
+}
diff --git a/stubs/synapse/synapse_rust.pyi b/stubs/synapse/synapse_rust.pyi
deleted file mode 100644
index 8658d3138f..0000000000
--- a/stubs/synapse/synapse_rust.pyi
+++ /dev/null
@@ -1,2 +0,0 @@
-def sum_as_string(a: int, b: int) -> str: ...
-def get_rust_file_digest() -> str: ...
diff --git a/stubs/synapse/synapse_rust/__init__.pyi b/stubs/synapse/synapse_rust/__init__.pyi
new file mode 100644
index 0000000000..8658d3138f
--- /dev/null
+++ b/stubs/synapse/synapse_rust/__init__.pyi
@@ -0,0 +1,2 @@
+def sum_as_string(a: int, b: int) -> str: ...
+def get_rust_file_digest() -> str: ...
diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi
new file mode 100644
index 0000000000..93c4e69d42
--- /dev/null
+++ b/stubs/synapse/synapse_rust/push.pyi
@@ -0,0 +1,37 @@
+from typing import Any, Collection, Dict, Mapping, Sequence, Tuple, Union
+
+from synapse.types import JsonDict
+
+class PushRule:
+    @property
+    def rule_id(self) -> str: ...
+    @property
+    def priority_class(self) -> int: ...
+    @property
+    def conditions(self) -> Sequence[Mapping[str, str]]: ...
+    @property
+    def actions(self) -> Sequence[Union[Mapping[str, Any], str]]: ...
+    @property
+    def default(self) -> bool: ...
+    @property
+    def default_enabled(self) -> bool: ...
+    @staticmethod
+    def from_db(
+        rule_id: str, priority_class: int, conditions: str, actions: str
+    ) -> "PushRule": ...
+
+class PushRules:
+    def __init__(self, rules: Collection[PushRule]): ...
+    def rules(self) -> Collection[PushRule]: ...
+
+class FilteredPushRules:
+    def __init__(
+        self,
+        push_rules: PushRules,
+        enabled_map: Dict[str, bool],
+        msc3786_enabled: bool,
+        msc3772_enabled: bool,
+    ): ...
+    def rules(self) -> Collection[Tuple[PushRule, bool]]: ...
+
+def get_base_rule_ids() -> Collection[str]: ...
diff --git a/synapse/handlers/push_rules.py b/synapse/handlers/push_rules.py
index 2599160bcc..1219672a59 100644
--- a/synapse/handlers/push_rules.py
+++ b/synapse/handlers/push_rules.py
@@ -16,14 +16,17 @@ from typing import TYPE_CHECKING, List, Optional, Union
 import attr
 
 from synapse.api.errors import SynapseError, UnrecognizedRequestError
-from synapse.push.baserules import BASE_RULE_IDS
 from synapse.storage.push_rule import RuleNotFoundException
+from synapse.synapse_rust.push import get_base_rule_ids
 from synapse.types import JsonDict
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
 
 
+BASE_RULE_IDS = get_base_rule_ids()
+
+
 @attr.s(slots=True, frozen=True, auto_attribs=True)
 class RuleSpec:
     scope: str
diff --git a/synapse/push/baserules.py b/synapse/push/baserules.py
deleted file mode 100644
index 440205e80c..0000000000
--- a/synapse/push/baserules.py
+++ /dev/null
@@ -1,583 +0,0 @@
-# Copyright 2015, 2016 OpenMarket Ltd
-# Copyright 2017 New Vector Ltd
-# Copyright 2019 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Push rules is the system used to determine which events trigger a push (and a
-bump in notification counts).
-
-This consists of a list of "push rules" for each user, where a push rule is a
-pair of "conditions" and "actions". When a user receives an event Synapse
-iterates over the list of push rules until it finds one where all the conditions
-match the event, at which point "actions" describe the outcome (e.g. notify,
-highlight, etc).
-
-Push rules are split up into 5 different "kinds" (aka "priority classes"), which
-are run in order:
-    1. Override — highest priority rules, e.g. always ignore notices
-    2. Content — content specific rules, e.g. @ notifications
-    3. Room — per room rules, e.g. enable/disable notifications for all messages
-       in a room
-    4. Sender — per sender rules, e.g. never notify for messages from a given
-       user
-    5. Underride — the lowest priority "default" rules, e.g. notify for every
-       message.
-
-The set of "base rules" are the list of rules that every user has by default. A
-user can modify their copy of the push rules in one of three ways:
-
-    1. Adding a new push rule of a certain kind
-    2. Changing the actions of a base rule
-    3. Enabling/disabling a base rule.
-
-The base rules are split into whether they come before or after a particular
-kind, so the order of push rule evaluation would be: base rules for before
-"override" kind, user defined "override" rules, base rules after "override"
-kind, etc, etc.
-"""
-
-import itertools
-import logging
-from typing import Dict, Iterator, List, Mapping, Sequence, Tuple, Union
-
-import attr
-
-from synapse.config.experimental import ExperimentalConfig
-from synapse.push.rulekinds import PRIORITY_CLASS_MAP
-
-logger = logging.getLogger(__name__)
-
-
-@attr.s(auto_attribs=True, slots=True, frozen=True)
-class PushRule:
-    """A push rule
-
-    Attributes:
-        rule_id: a unique ID for this rule
-        priority_class: what "kind" of push rule this is (see
-            `PRIORITY_CLASS_MAP` for mapping between int and kind)
-        conditions: the sequence of conditions that all need to match
-        actions: the actions to apply if all conditions are met
-        default: is this a base rule?
-        default_enabled: is this enabled by default?
-    """
-
-    rule_id: str
-    priority_class: int
-    conditions: Sequence[Mapping[str, str]]
-    actions: Sequence[Union[str, Mapping]]
-    default: bool = False
-    default_enabled: bool = True
-
-
-@attr.s(auto_attribs=True, slots=True, frozen=True, weakref_slot=False)
-class PushRules:
-    """A collection of push rules for an account.
-
-    Can be iterated over, producing push rules in priority order.
-    """
-
-    # A mapping from rule ID to push rule that overrides a base rule. These will
-    # be returned instead of the base rule.
-    overriden_base_rules: Dict[str, PushRule] = attr.Factory(dict)
-
-    # The following stores the custom push rules at each priority class.
-    #
-    # We keep these separate (rather than combining into one big list) to avoid
-    # copying the base rules around all the time.
-    override: List[PushRule] = attr.Factory(list)
-    content: List[PushRule] = attr.Factory(list)
-    room: List[PushRule] = attr.Factory(list)
-    sender: List[PushRule] = attr.Factory(list)
-    underride: List[PushRule] = attr.Factory(list)
-
-    def __iter__(self) -> Iterator[PushRule]:
-        # When iterating over the push rules we need to return the base rules
-        # interspersed at the correct spots.
-        for rule in itertools.chain(
-            BASE_PREPEND_OVERRIDE_RULES,
-            self.override,
-            BASE_APPEND_OVERRIDE_RULES,
-            self.content,
-            BASE_APPEND_CONTENT_RULES,
-            self.room,
-            self.sender,
-            self.underride,
-            BASE_APPEND_UNDERRIDE_RULES,
-        ):
-            # Check if a base rule has been overriden by a custom rule. If so
-            # return that instead.
-            override_rule = self.overriden_base_rules.get(rule.rule_id)
-            if override_rule:
-                yield override_rule
-            else:
-                yield rule
-
-    def __len__(self) -> int:
-        # The length is mostly used by caches to get a sense of "size" / amount
-        # of memory this object is using, so we only count the number of custom
-        # rules.
-        return (
-            len(self.overriden_base_rules)
-            + len(self.override)
-            + len(self.content)
-            + len(self.room)
-            + len(self.sender)
-            + len(self.underride)
-        )
-
-
-@attr.s(auto_attribs=True, slots=True, frozen=True, weakref_slot=False)
-class FilteredPushRules:
-    """A wrapper around `PushRules` that filters out disabled experimental push
-    rules, and includes the "enabled" state for each rule when iterated over.
-    """
-
-    push_rules: PushRules
-    enabled_map: Dict[str, bool]
-    experimental_config: ExperimentalConfig
-
-    def __iter__(self) -> Iterator[Tuple[PushRule, bool]]:
-        for rule in self.push_rules:
-            if not _is_experimental_rule_enabled(
-                rule.rule_id, self.experimental_config
-            ):
-                continue
-
-            enabled = self.enabled_map.get(rule.rule_id, rule.default_enabled)
-
-            yield rule, enabled
-
-    def __len__(self) -> int:
-        return len(self.push_rules)
-
-
-DEFAULT_EMPTY_PUSH_RULES = PushRules()
-
-
-def compile_push_rules(rawrules: List[PushRule]) -> PushRules:
-    """Given a set of custom push rules return a `PushRules` instance (which
-    includes the base rules).
-    """
-
-    if not rawrules:
-        # Fast path to avoid allocating empty lists when there are no custom
-        # rules for the user.
-        return DEFAULT_EMPTY_PUSH_RULES
-
-    rules = PushRules()
-
-    for rule in rawrules:
-        # We need to decide which bucket each custom push rule goes into.
-
-        # If it has the same ID as a base rule then it overrides that...
-        overriden_base_rule = BASE_RULES_BY_ID.get(rule.rule_id)
-        if overriden_base_rule:
-            rules.overriden_base_rules[rule.rule_id] = attr.evolve(
-                overriden_base_rule, actions=rule.actions
-            )
-            continue
-
-        # ... otherwise it gets added to the appropriate priority class bucket
-        collection: List[PushRule]
-        if rule.priority_class == 5:
-            collection = rules.override
-        elif rule.priority_class == 4:
-            collection = rules.content
-        elif rule.priority_class == 3:
-            collection = rules.room
-        elif rule.priority_class == 2:
-            collection = rules.sender
-        elif rule.priority_class == 1:
-            collection = rules.underride
-        elif rule.priority_class <= 0:
-            logger.info(
-                "Got rule with priority class less than zero, but doesn't override a base rule: %s",
-                rule,
-            )
-            continue
-        else:
-            # We log and continue here so as not to break event sending
-            logger.error("Unknown priority class: %", rule.priority_class)
-            continue
-
-        collection.append(rule)
-
-    return rules
-
-
-def _is_experimental_rule_enabled(
-    rule_id: str, experimental_config: ExperimentalConfig
-) -> bool:
-    """Used by `FilteredPushRules` to filter out experimental rules when they
-    have not been enabled.
-    """
-    if (
-        rule_id == "global/override/.org.matrix.msc3786.rule.room.server_acl"
-        and not experimental_config.msc3786_enabled
-    ):
-        return False
-    if (
-        rule_id == "global/underride/.org.matrix.msc3772.thread_reply"
-        and not experimental_config.msc3772_enabled
-    ):
-        return False
-    return True
-
-
-BASE_APPEND_CONTENT_RULES = [
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["content"],
-        rule_id="global/content/.m.rule.contains_user_name",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "content.body",
-                # Match the localpart of the requester's MXID.
-                "pattern_type": "user_localpart",
-            }
-        ],
-        actions=[
-            "notify",
-            {"set_tweak": "sound", "value": "default"},
-            {"set_tweak": "highlight"},
-        ],
-    )
-]
-
-
-BASE_PREPEND_OVERRIDE_RULES = [
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["override"],
-        rule_id="global/override/.m.rule.master",
-        default_enabled=False,
-        conditions=[],
-        actions=["dont_notify"],
-    )
-]
-
-
-BASE_APPEND_OVERRIDE_RULES = [
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["override"],
-        rule_id="global/override/.m.rule.suppress_notices",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "content.msgtype",
-                "pattern": "m.notice",
-                "_cache_key": "_suppress_notices",
-            }
-        ],
-        actions=["dont_notify"],
-    ),
-    # NB. .m.rule.invite_for_me must be higher prio than .m.rule.member_event
-    # otherwise invites will be matched by .m.rule.member_event
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["override"],
-        rule_id="global/override/.m.rule.invite_for_me",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.room.member",
-                "_cache_key": "_member",
-            },
-            {
-                "kind": "event_match",
-                "key": "content.membership",
-                "pattern": "invite",
-                "_cache_key": "_invite_member",
-            },
-            # Match the requester's MXID.
-            {"kind": "event_match", "key": "state_key", "pattern_type": "user_id"},
-        ],
-        actions=[
-            "notify",
-            {"set_tweak": "sound", "value": "default"},
-            {"set_tweak": "highlight", "value": False},
-        ],
-    ),
-    # Will we sometimes want to know about people joining and leaving?
-    # Perhaps: if so, this could be expanded upon. Seems the most usual case
-    # is that we don't though. We add this override rule so that even if
-    # the room rule is set to notify, we don't get notifications about
-    # join/leave/avatar/displayname events.
-    # See also: https://matrix.org/jira/browse/SYN-607
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["override"],
-        rule_id="global/override/.m.rule.member_event",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.room.member",
-                "_cache_key": "_member",
-            }
-        ],
-        actions=["dont_notify"],
-    ),
-    # This was changed from underride to override so it's closer in priority
-    # to the content rules where the user name highlight rule lives. This
-    # way a room rule is lower priority than both but a custom override rule
-    # is higher priority than both.
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["override"],
-        rule_id="global/override/.m.rule.contains_display_name",
-        conditions=[{"kind": "contains_display_name"}],
-        actions=[
-            "notify",
-            {"set_tweak": "sound", "value": "default"},
-            {"set_tweak": "highlight"},
-        ],
-    ),
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["override"],
-        rule_id="global/override/.m.rule.roomnotif",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "content.body",
-                "pattern": "@room",
-                "_cache_key": "_roomnotif_content",
-            },
-            {
-                "kind": "sender_notification_permission",
-                "key": "room",
-                "_cache_key": "_roomnotif_pl",
-            },
-        ],
-        actions=["notify", {"set_tweak": "highlight", "value": True}],
-    ),
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["override"],
-        rule_id="global/override/.m.rule.tombstone",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.room.tombstone",
-                "_cache_key": "_tombstone",
-            },
-            {
-                "kind": "event_match",
-                "key": "state_key",
-                "pattern": "",
-                "_cache_key": "_tombstone_statekey",
-            },
-        ],
-        actions=["notify", {"set_tweak": "highlight", "value": True}],
-    ),
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["override"],
-        rule_id="global/override/.m.rule.reaction",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.reaction",
-                "_cache_key": "_reaction",
-            }
-        ],
-        actions=["dont_notify"],
-    ),
-    # XXX: This is an experimental rule that is only enabled if msc3786_enabled
-    # is enabled, if it is not the rule gets filtered out in _load_rules() in
-    # PushRulesWorkerStore
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["override"],
-        rule_id="global/override/.org.matrix.msc3786.rule.room.server_acl",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.room.server_acl",
-                "_cache_key": "_room_server_acl",
-            },
-            {
-                "kind": "event_match",
-                "key": "state_key",
-                "pattern": "",
-                "_cache_key": "_room_server_acl_state_key",
-            },
-        ],
-        actions=[],
-    ),
-]
-
-
-BASE_APPEND_UNDERRIDE_RULES = [
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["underride"],
-        rule_id="global/underride/.m.rule.call",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.call.invite",
-                "_cache_key": "_call",
-            }
-        ],
-        actions=[
-            "notify",
-            {"set_tweak": "sound", "value": "ring"},
-            {"set_tweak": "highlight", "value": False},
-        ],
-    ),
-    # XXX: once m.direct is standardised everywhere, we should use it to detect
-    # a DM from the user's perspective rather than this heuristic.
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["underride"],
-        rule_id="global/underride/.m.rule.room_one_to_one",
-        conditions=[
-            {"kind": "room_member_count", "is": "2", "_cache_key": "member_count"},
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.room.message",
-                "_cache_key": "_message",
-            },
-        ],
-        actions=[
-            "notify",
-            {"set_tweak": "sound", "value": "default"},
-            {"set_tweak": "highlight", "value": False},
-        ],
-    ),
-    # XXX: this is going to fire for events which aren't m.room.messages
-    # but are encrypted (e.g. m.call.*)...
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["underride"],
-        rule_id="global/underride/.m.rule.encrypted_room_one_to_one",
-        conditions=[
-            {"kind": "room_member_count", "is": "2", "_cache_key": "member_count"},
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.room.encrypted",
-                "_cache_key": "_encrypted",
-            },
-        ],
-        actions=[
-            "notify",
-            {"set_tweak": "sound", "value": "default"},
-            {"set_tweak": "highlight", "value": False},
-        ],
-    ),
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["underride"],
-        rule_id="global/underride/.org.matrix.msc3772.thread_reply",
-        conditions=[
-            {
-                "kind": "org.matrix.msc3772.relation_match",
-                "rel_type": "m.thread",
-                # Match the requester's MXID.
-                "sender_type": "user_id",
-            }
-        ],
-        actions=["notify", {"set_tweak": "highlight", "value": False}],
-    ),
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["underride"],
-        rule_id="global/underride/.m.rule.message",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.room.message",
-                "_cache_key": "_message",
-            }
-        ],
-        actions=["notify", {"set_tweak": "highlight", "value": False}],
-    ),
-    # XXX: this is going to fire for events which aren't m.room.messages
-    # but are encrypted (e.g. m.call.*)...
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["underride"],
-        rule_id="global/underride/.m.rule.encrypted",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.room.encrypted",
-                "_cache_key": "_encrypted",
-            }
-        ],
-        actions=["notify", {"set_tweak": "highlight", "value": False}],
-    ),
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["underride"],
-        rule_id="global/underride/.im.vector.jitsi",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "im.vector.modular.widgets",
-                "_cache_key": "_type_modular_widgets",
-            },
-            {
-                "kind": "event_match",
-                "key": "content.type",
-                "pattern": "jitsi",
-                "_cache_key": "_content_type_jitsi",
-            },
-            {
-                "kind": "event_match",
-                "key": "state_key",
-                "pattern": "*",
-                "_cache_key": "_is_state_event",
-            },
-        ],
-        actions=["notify", {"set_tweak": "highlight", "value": False}],
-    ),
-]
-
-
-BASE_RULE_IDS = set()
-
-BASE_RULES_BY_ID: Dict[str, PushRule] = {}
-
-for r in BASE_APPEND_CONTENT_RULES:
-    BASE_RULE_IDS.add(r.rule_id)
-    BASE_RULES_BY_ID[r.rule_id] = r
-
-for r in BASE_PREPEND_OVERRIDE_RULES:
-    BASE_RULE_IDS.add(r.rule_id)
-    BASE_RULES_BY_ID[r.rule_id] = r
-
-for r in BASE_APPEND_OVERRIDE_RULES:
-    BASE_RULE_IDS.add(r.rule_id)
-    BASE_RULES_BY_ID[r.rule_id] = r
-
-for r in BASE_APPEND_UNDERRIDE_RULES:
-    BASE_RULE_IDS.add(r.rule_id)
-    BASE_RULES_BY_ID[r.rule_id] = r
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 3846fbc5f0..404379ef67 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -37,11 +37,11 @@ from synapse.events.snapshot import EventContext
 from synapse.state import POWER_KEY
 from synapse.storage.databases.main.roommember import EventIdMembership
 from synapse.storage.state import StateFilter
+from synapse.synapse_rust.push import FilteredPushRules, PushRule
 from synapse.util.caches import register_cache
 from synapse.util.metrics import measure_func
 from synapse.visibility import filter_event_for_clients_with_state
 
-from .baserules import FilteredPushRules, PushRule
 from .push_rule_evaluator import PushRuleEvaluatorForEvent
 
 if TYPE_CHECKING:
@@ -280,7 +280,8 @@ class BulkPushRuleEvaluator:
         thread_id = "main"
         if relation:
             relations = await self._get_mutual_relations(
-                relation.parent_id, itertools.chain(*rules_by_user.values())
+                relation.parent_id,
+                itertools.chain(*(r.rules() for r in rules_by_user.values())),
             )
             if relation.rel_type == RelationTypes.THREAD:
                 thread_id = relation.parent_id
@@ -333,7 +334,7 @@ class BulkPushRuleEvaluator:
                 # current user, it'll be added to the dict later.
                 actions_by_user[uid] = []
 
-            for rule, enabled in rules:
+            for rule, enabled in rules.rules():
                 if not enabled:
                     continue
 
diff --git a/synapse/push/clientformat.py b/synapse/push/clientformat.py
index 73618d9234..ebc13beda1 100644
--- a/synapse/push/clientformat.py
+++ b/synapse/push/clientformat.py
@@ -16,10 +16,9 @@ import copy
 from typing import Any, Dict, List, Optional
 
 from synapse.push.rulekinds import PRIORITY_CLASS_INVERSE_MAP, PRIORITY_CLASS_MAP
+from synapse.synapse_rust.push import FilteredPushRules, PushRule
 from synapse.types import UserID
 
-from .baserules import FilteredPushRules, PushRule
-
 
 def format_push_rules_for_user(
     user: UserID, ruleslist: FilteredPushRules
@@ -34,7 +33,7 @@ def format_push_rules_for_user(
 
     rules["global"] = _add_empty_priority_class_arrays(rules["global"])
 
-    for r, enabled in ruleslist:
+    for r, enabled in ruleslist.rules():
         template_name = _priority_class_to_template_name(r.priority_class)
 
         rulearray = rules["global"][template_name]
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index 5079edd1e0..ed17b2e70c 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -30,9 +30,8 @@ from typing import (
 
 from synapse.api.errors import StoreError
 from synapse.config.homeserver import ExperimentalConfig
-from synapse.push.baserules import FilteredPushRules, PushRule, compile_push_rules
 from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
-from synapse.storage._base import SQLBaseStore, db_to_json
+from synapse.storage._base import SQLBaseStore
 from synapse.storage.database import (
     DatabasePool,
     LoggingDatabaseConnection,
@@ -51,6 +50,7 @@ from synapse.storage.util.id_generators import (
     IdGenerator,
     StreamIdGenerator,
 )
+from synapse.synapse_rust.push import FilteredPushRules, PushRule, PushRules
 from synapse.types import JsonDict
 from synapse.util import json_encoder
 from synapse.util.caches.descriptors import cached, cachedList
@@ -72,18 +72,25 @@ def _load_rules(
     """
 
     ruleslist = [
-        PushRule(
+        PushRule.from_db(
             rule_id=rawrule["rule_id"],
             priority_class=rawrule["priority_class"],
-            conditions=db_to_json(rawrule["conditions"]),
-            actions=db_to_json(rawrule["actions"]),
+            conditions=rawrule["conditions"],
+            actions=rawrule["actions"],
         )
         for rawrule in rawrules
     ]
 
-    push_rules = compile_push_rules(ruleslist)
+    push_rules = PushRules(
+        ruleslist,
+    )
 
-    filtered_rules = FilteredPushRules(push_rules, enabled_map, experimental_config)
+    filtered_rules = FilteredPushRules(
+        push_rules,
+        enabled_map,
+        msc3786_enabled=experimental_config.msc3786_enabled,
+        msc3772_enabled=experimental_config.msc3772_enabled,
+    )
 
     return filtered_rules
 
@@ -845,7 +852,7 @@ class PushRuleStore(PushRulesWorkerStore):
         user_push_rules = await self.get_push_rules_for_user(user_id)
 
         # Get rules relating to the old room and copy them to the new room
-        for rule, enabled in user_push_rules:
+        for rule, enabled in user_push_rules.rules():
             if not enabled:
                 continue
 
diff --git a/tests/handlers/test_deactivate_account.py b/tests/handlers/test_deactivate_account.py
index 7b9b711521..bce65fab7d 100644
--- a/tests/handlers/test_deactivate_account.py
+++ b/tests/handlers/test_deactivate_account.py
@@ -15,11 +15,11 @@
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.constants import AccountDataTypes
-from synapse.push.baserules import PushRule
 from synapse.push.rulekinds import PRIORITY_CLASS_MAP
 from synapse.rest import admin
 from synapse.rest.client import account, login
 from synapse.server import HomeServer
+from synapse.synapse_rust.push import PushRule
 from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase
@@ -161,20 +161,15 @@ class DeactivateAccountTestCase(HomeserverTestCase):
             self._store.get_push_rules_for_user(self.user)
         )
         # Filter out default rules; we don't care
-        push_rules = [r for r, _ in filtered_push_rules if self._is_custom_rule(r)]
+        push_rules = [
+            r for r, _ in filtered_push_rules.rules() if self._is_custom_rule(r)
+        ]
         # Check our rule made it
-        self.assertEqual(
-            push_rules,
-            [
-                PushRule(
-                    rule_id="personal.override.rule1",
-                    priority_class=5,
-                    conditions=[],
-                    actions=[],
-                )
-            ],
-            push_rules,
-        )
+        self.assertEqual(len(push_rules), 1)
+        self.assertEqual(push_rules[0].rule_id, "personal.override.rule1")
+        self.assertEqual(push_rules[0].priority_class, 5)
+        self.assertEqual(push_rules[0].conditions, [])
+        self.assertEqual(push_rules[0].actions, [])
 
         # Request the deactivation of our account
         self._deactivate_my_account()
@@ -183,7 +178,9 @@ class DeactivateAccountTestCase(HomeserverTestCase):
             self._store.get_push_rules_for_user(self.user)
         )
         # Filter out default rules; we don't care
-        push_rules = [r for r, _ in filtered_push_rules if self._is_custom_rule(r)]
+        push_rules = [
+            r for r, _ in filtered_push_rules.rules() if self._is_custom_rule(r)
+        ]
         # Check our rule no longer exists
         self.assertEqual(push_rules, [], push_rules)
 
-- 
cgit 1.5.1


From fff9b955fa39bda2cca1fa726b561c7886e746a1 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 20 Sep 2022 14:14:12 +0100
Subject: Generate separate snapshots for logical databases (#13792)

* Generate separate snapshots for sqlite, postgres and common
* Cleanup postgres dbs in the TRAP
* Say which logical DB we're applying updates to
* Run background updates on the state DB
* Add new option for accepting a SCHEMA_NUMBER
---
 changelog.d/13792.misc                      |   1 +
 scripts-dev/make_full_schema.sh             | 166 +++++++++++++++++++++-------
 synapse/_scripts/update_synapse_database.py |  14 ++-
 synapse/storage/background_updates.py       |   5 +-
 4 files changed, 140 insertions(+), 46 deletions(-)
 create mode 100644 changelog.d/13792.misc

(limited to 'synapse')

diff --git a/changelog.d/13792.misc b/changelog.d/13792.misc
new file mode 100644
index 0000000000..36ac91400a
--- /dev/null
+++ b/changelog.d/13792.misc
@@ -0,0 +1 @@
+Update the script which makes full schema dumps.
diff --git a/scripts-dev/make_full_schema.sh b/scripts-dev/make_full_schema.sh
index 61394360ce..d8cd06ee4f 100755
--- a/scripts-dev/make_full_schema.sh
+++ b/scripts-dev/make_full_schema.sh
@@ -2,23 +2,16 @@
 #
 # This script generates SQL files for creating a brand new Synapse DB with the latest
 # schema, on both SQLite3 and Postgres.
-#
-# It does so by having Synapse generate an up-to-date SQLite DB, then running
-# synapse_port_db to convert it to Postgres. It then dumps the contents of both.
 
 export PGHOST="localhost"
-POSTGRES_DB_NAME="synapse_full_schema.$$"
-
-SQLITE_SCHEMA_FILE="schema.sql.sqlite"
-SQLITE_ROWS_FILE="rows.sql.sqlite"
-POSTGRES_SCHEMA_FILE="full.sql.postgres"
-POSTGRES_ROWS_FILE="rows.sql.postgres"
-
+POSTGRES_MAIN_DB_NAME="synapse_full_schema_main.$$"
+POSTGRES_COMMON_DB_NAME="synapse_full_schema_common.$$"
+POSTGRES_STATE_DB_NAME="synapse_full_schema_state.$$"
 REQUIRED_DEPS=("matrix-synapse" "psycopg2")
 
 usage() {
   echo
-  echo "Usage: $0 -p <postgres_username> -o <path> [-c] [-n] [-h]"
+  echo "Usage: $0 -p <postgres_username> -o <path> [-c] [-n <schema number>] [-h]"
   echo
   echo "-p <postgres_username>"
   echo "  Username to connect to local postgres instance. The password will be requested"
@@ -27,11 +20,16 @@ usage() {
   echo "  CI mode. Prints every command that the script runs."
   echo "-o <path>"
   echo "  Directory to output full schema files to."
+  echo "-n <schema number>"
+  echo "  Schema number for the new snapshot. Used to set the location of files within "
+  echo "  the output directory, mimicking that of synapse/storage/schemas."
+  echo "  Defaults to 9999."
   echo "-h"
   echo "  Display this help text."
 }
 
-while getopts "p:co:h" opt; do
+SCHEMA_NUMBER="9999"
+while getopts "p:co:hn:" opt; do
   case $opt in
     p)
       export PGUSER=$OPTARG
@@ -48,6 +46,9 @@ while getopts "p:co:h" opt; do
       usage
       exit
       ;;
+    n)
+      SCHEMA_NUMBER="$OPTARG"
+      ;;
     \?)
       echo "ERROR: Invalid option: -$OPTARG" >&2
       usage
@@ -95,12 +96,21 @@ cd "$(dirname "$0")/.."
 TMPDIR=$(mktemp -d)
 KEY_FILE=$TMPDIR/test.signing.key # default Synapse signing key path
 SQLITE_CONFIG=$TMPDIR/sqlite.conf
-SQLITE_DB=$TMPDIR/homeserver.db
+SQLITE_MAIN_DB=$TMPDIR/main.db
+SQLITE_STATE_DB=$TMPDIR/state.db
+SQLITE_COMMON_DB=$TMPDIR/common.db
 POSTGRES_CONFIG=$TMPDIR/postgres.conf
 
 # Ensure these files are delete on script exit
-# TODO: the trap should also drop the temp postgres DB
-trap 'rm -rf $TMPDIR' EXIT
+cleanup() {
+  echo "Cleaning up temporary sqlite database and config files..."
+  rm -r "$TMPDIR"
+  echo "Cleaning up temporary Postgres database..."
+  dropdb --if-exists "$POSTGRES_COMMON_DB_NAME"
+  dropdb --if-exists "$POSTGRES_MAIN_DB_NAME"
+  dropdb --if-exists "$POSTGRES_STATE_DB_NAME"
+}
+trap 'cleanup' EXIT
 
 cat > "$SQLITE_CONFIG" <<EOF
 server_name: "test"
@@ -110,10 +120,22 @@ macaroon_secret_key: "abcde"
 
 report_stats: false
 
-database:
-  name: "sqlite3"
-  args:
-    database: "$SQLITE_DB"
+databases:
+  common:
+    name: "sqlite3"
+    data_stores: []
+    args:
+      database: "$SQLITE_COMMON_DB"
+  main:
+    name: "sqlite3"
+    data_stores: ["main"]
+    args:
+      database: "$SQLITE_MAIN_DB"
+  state:
+    name: "sqlite3"
+    data_stores: ["state"]
+    args:
+      database: "$SQLITE_STATE_DB"
 
 # Suppress the key server warning.
 trusted_key_servers: []
@@ -127,13 +149,32 @@ macaroon_secret_key: "abcde"
 
 report_stats: false
 
-database:
-  name: "psycopg2"
-  args:
-    user: "$PGUSER"
-    host: "$PGHOST"
-    password: "$PGPASSWORD"
-    database: "$POSTGRES_DB_NAME"
+databases:
+  common:
+    name: "psycopg2"
+    data_stores: []
+    args:
+      user: "$PGUSER"
+      host: "$PGHOST"
+      password: "$PGPASSWORD"
+      database: "$POSTGRES_COMMON_DB_NAME"
+  main:
+    name: "psycopg2"
+    data_stores: ["main"]
+    args:
+      user: "$PGUSER"
+      host: "$PGHOST"
+      password: "$PGPASSWORD"
+      database: "$POSTGRES_MAIN_DB_NAME"
+  state:
+    name: "psycopg2"
+    data_stores: ["state"]
+    args:
+      user: "$PGUSER"
+      host: "$PGHOST"
+      password: "$PGPASSWORD"
+      database: "$POSTGRES_STATE_DB_NAME"
+
 
 # Suppress the key server warning.
 trusted_key_servers: []
@@ -148,33 +189,76 @@ echo "Running db background jobs..."
 synapse/_scripts/update_synapse_database.py --database-config "$SQLITE_CONFIG" --run-background-updates
 
 # Create the PostgreSQL database.
-echo "Creating postgres database..."
-createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_DB_NAME"
+echo "Creating postgres databases..."
+createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_COMMON_DB_NAME"
+createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_MAIN_DB_NAME"
+createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_STATE_DB_NAME"
 
 echo "Running db background jobs..."
 synapse/_scripts/update_synapse_database.py --database-config "$POSTGRES_CONFIG" --run-background-updates
 
 
-# Delete schema_version, applied_schema_deltas and applied_module_schemas tables
-# Also delete any shadow tables from fts4
 echo "Dropping unwanted db tables..."
-SQL="
+
+# Some common tables are created and updated by Synapse itself and do not belong in the
+# schema.
+DROP_APP_MANAGED_TABLES="
 DROP TABLE schema_version;
+DROP TABLE schema_compat_version;
 DROP TABLE applied_schema_deltas;
 DROP TABLE applied_module_schemas;
 "
-sqlite3 "$SQLITE_DB" <<< "$SQL"
-psql "$POSTGRES_DB_NAME" -w <<< "$SQL"
+# Other common tables are not created by Synapse and do belong in the schema.
+# TODO: we could derive DROP_COMMON_TABLES from the dump of the common-only DB. But
+#       since there's only one table there, I haven't bothered to do so.
+DROP_COMMON_TABLES="$DROP_APP_MANAGED_TABLES
+DROP TABLE background_updates;
+"
+
+sqlite3 "$SQLITE_COMMON_DB" <<< "$DROP_APP_MANAGED_TABLES"
+sqlite3 "$SQLITE_MAIN_DB" <<< "$DROP_COMMON_TABLES"
+sqlite3 "$SQLITE_STATE_DB" <<< "$DROP_COMMON_TABLES"
+psql "$POSTGRES_COMMON_DB_NAME" -w <<< "$DROP_APP_MANAGED_TABLES"
+psql "$POSTGRES_MAIN_DB_NAME" -w <<< "$DROP_COMMON_TABLES"
+psql "$POSTGRES_STATE_DB_NAME" -w <<< "$DROP_COMMON_TABLES"
+
+# For Reasons(TM), SQLite's `.schema` also dumps out "shadow tables", the implementation
+# details behind full text search tables. Omit these from the dumps.
+
+sqlite3 "$SQLITE_MAIN_DB" <<< "
+DROP TABLE event_search_content;
+DROP TABLE event_search_segments;
+DROP TABLE event_search_segdir;
+DROP TABLE event_search_docsize;
+DROP TABLE event_search_stat;
+DROP TABLE user_directory_search_content;
+DROP TABLE user_directory_search_segments;
+DROP TABLE user_directory_search_segdir;
+DROP TABLE user_directory_search_docsize;
+DROP TABLE user_directory_search_stat;
+"
 
-echo "Dumping SQLite3 schema to '$OUTPUT_DIR/$SQLITE_SCHEMA_FILE' and '$OUTPUT_DIR/$SQLITE_ROWS_FILE'..."
-sqlite3 "$SQLITE_DB" ".schema --indent" > "$OUTPUT_DIR/$SQLITE_SCHEMA_FILE"
-sqlite3 "$SQLITE_DB" ".dump --data-only --nosys" > "$OUTPUT_DIR/$SQLITE_ROWS_FILE"
+echo "Dumping SQLite3 schema..."
+
+mkdir -p "$OUTPUT_DIR/"{common,main,state}"/full_schema/$SCHEMA_NUMBER"
+sqlite3 "$SQLITE_COMMON_DB" ".schema --indent"           > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
+sqlite3 "$SQLITE_COMMON_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
+sqlite3 "$SQLITE_MAIN_DB"   ".schema --indent"           > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
+sqlite3 "$SQLITE_MAIN_DB"   ".dump --data-only --nosys" >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
+sqlite3 "$SQLITE_STATE_DB"  ".schema --indent"           > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
+sqlite3 "$SQLITE_STATE_DB"  ".dump --data-only --nosys" >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
+
+cleanup_pg_schema() {
+   sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d'
+}
 
-echo "Dumping Postgres schema to '$OUTPUT_DIR/$POSTGRES_SCHEMA_FILE' and '$OUTPUT_DIR/$POSTGRES_ROWS_FILE'..."
-pg_dump --format=plain --schema-only         --no-tablespaces --no-acl --no-owner "$POSTGRES_DB_NAME" | sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_SCHEMA_FILE"
-pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_DB_NAME" | sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_ROWS_FILE"
+echo "Dumping Postgres schema..."
 
-echo "Cleaning up temporary Postgres database..."
-dropdb $POSTGRES_DB_NAME
+pg_dump --format=plain --schema-only         --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema  > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
+pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
+pg_dump --format=plain --schema-only         --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME"   | cleanup_pg_schema  > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
+pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME"   | cleanup_pg_schema >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
+pg_dump --format=plain --schema-only         --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME"  | cleanup_pg_schema  > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
+pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME"  | cleanup_pg_schema >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
 
 echo "Done! Files dumped to: $OUTPUT_DIR"
diff --git a/synapse/_scripts/update_synapse_database.py b/synapse/_scripts/update_synapse_database.py
index b4aeae6dd5..fb1fb83f50 100755
--- a/synapse/_scripts/update_synapse_database.py
+++ b/synapse/_scripts/update_synapse_database.py
@@ -48,10 +48,13 @@ class MockHomeserver(HomeServer):
 
 
 def run_background_updates(hs: HomeServer) -> None:
-    store = hs.get_datastores().main
+    main = hs.get_datastores().main
+    state = hs.get_datastores().state
 
     async def run_background_updates() -> None:
-        await store.db_pool.updates.run_background_updates(sleep=False)
+        await main.db_pool.updates.run_background_updates(sleep=False)
+        if state:
+            await state.db_pool.updates.run_background_updates(sleep=False)
         # Stop the reactor to exit the script once every background update is run.
         reactor.stop()
 
@@ -97,8 +100,11 @@ def main() -> None:
     # Load, process and sanity-check the config.
     hs_config = yaml.safe_load(args.database_config)
 
-    if "database" not in hs_config:
-        sys.stderr.write("The configuration file must have a 'database' section.\n")
+    if "database" not in hs_config and "databases" not in hs_config:
+        sys.stderr.write(
+            "The configuration file must have a 'database' or 'databases' section. "
+            "See https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#database"
+        )
         sys.exit(4)
 
     config = HomeServerConfig()
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index bf5e7ee7be..2056ecb2c3 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -285,7 +285,10 @@ class BackgroundUpdater:
         back_to_back_failures = 0
 
         try:
-            logger.info("Starting background schema updates")
+            logger.info(
+                "Starting background schema updates for database %s",
+                self._database_name,
+            )
             while self.enabled:
                 try:
                     result = await self.do_next_background_update(sleep)
-- 
cgit 1.5.1


From 85fc7ea1a1fb38424923dd1ff117405aea04c33c Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Tue, 20 Sep 2022 15:18:07 +0200
Subject: Remove the `complete_sso_login` method from the Module API which was
 deprecated in Synapse 1.13.0. (#13843)

Signed-off-by: Quentin Gliech <quenting@element.io>
---
 changelog.d/13843.removal      |  1 +
 synapse/handlers/auth.py       | 34 +---------------------------------
 synapse/module_api/__init__.py | 25 -------------------------
 3 files changed, 2 insertions(+), 58 deletions(-)
 create mode 100644 changelog.d/13843.removal

(limited to 'synapse')

diff --git a/changelog.d/13843.removal b/changelog.d/13843.removal
new file mode 100644
index 0000000000..f6caaa8895
--- /dev/null
+++ b/changelog.d/13843.removal
@@ -0,0 +1 @@
+Remove the `complete_sso_login` method from the Module API which was deprecated in Synapse 1.13.0.
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index 0327fc57a4..eacd631ee0 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -63,7 +63,6 @@ from synapse.http.server import finish_request, respond_with_html
 from synapse.http.site import SynapseRequest
 from synapse.logging.context import defer_to_thread
 from synapse.metrics.background_process_metrics import run_as_background_process
-from synapse.storage.roommember import ProfileInfo
 from synapse.types import JsonDict, Requester, UserID
 from synapse.util import stringutils as stringutils
 from synapse.util.async_helpers import delay_cancellation, maybe_awaitable
@@ -1687,41 +1686,10 @@ class AuthHandler:
             respond_with_html(request, 403, self._sso_account_deactivated_template)
             return
 
-        profile = await self.store.get_profileinfo(
+        user_profile_data = await self.store.get_profileinfo(
             UserID.from_string(registered_user_id).localpart
         )
 
-        self._complete_sso_login(
-            registered_user_id,
-            auth_provider_id,
-            request,
-            client_redirect_url,
-            extra_attributes,
-            new_user=new_user,
-            user_profile_data=profile,
-            auth_provider_session_id=auth_provider_session_id,
-        )
-
-    def _complete_sso_login(
-        self,
-        registered_user_id: str,
-        auth_provider_id: str,
-        request: Request,
-        client_redirect_url: str,
-        extra_attributes: Optional[JsonDict] = None,
-        new_user: bool = False,
-        user_profile_data: Optional[ProfileInfo] = None,
-        auth_provider_session_id: Optional[str] = None,
-    ) -> None:
-        """
-        The synchronous portion of complete_sso_login.
-
-        This exists purely for backwards compatibility of synapse.module_api.ModuleApi.
-        """
-
-        if user_profile_data is None:
-            user_profile_data = ProfileInfo(None, None)
-
         # Store any extra attributes which will be passed in the login response.
         # Note that this is per-user so it may overwrite a previous value, this
         # is considered OK since the newest SSO attributes should be most valid.
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 87ba154cb7..9287c0fb8d 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -836,31 +836,6 @@ class ModuleApi:
             self._store.db_pool.runInteraction(desc, func, *args, **kwargs)  # type: ignore[arg-type]
         )
 
-    def complete_sso_login(
-        self, registered_user_id: str, request: SynapseRequest, client_redirect_url: str
-    ) -> None:
-        """Complete a SSO login by redirecting the user to a page to confirm whether they
-        want their access token sent to `client_redirect_url`, or redirect them to that
-        URL with a token directly if the URL matches with one of the whitelisted clients.
-
-        This is deprecated in favor of complete_sso_login_async.
-
-        Added in Synapse v1.11.1.
-
-        Args:
-            registered_user_id: The MXID that has been registered as a previous step of
-                of this SSO login.
-            request: The request to respond to.
-            client_redirect_url: The URL to which to offer to redirect the user (or to
-                redirect them directly if whitelisted).
-        """
-        self._auth_handler._complete_sso_login(
-            registered_user_id,
-            "<unknown>",
-            request,
-            client_redirect_url,
-        )
-
     async def complete_sso_login_async(
         self,
         registered_user_id: str,
-- 
cgit 1.5.1